diff --git a/CREDITS b/CREDITS index 1f9f0f078b4a..53d11a46fd69 100644 --- a/CREDITS +++ b/CREDITS @@ -2515,11 +2515,9 @@ D: SLS distribution D: Initial implementation of VC's, pty's and select() N: Pavel Machek -E: pavel@ucw.cz +E: pavel@kernel.org P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96 -D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd, -D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, -D: work on suspend-to-ram/disk, killing duplicates from ioctl32, +D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk, D: Altera SoCFPGA and Nokia N900 support. S: Czech Republic diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index 5fa6655aee84..ae0191295d29 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -1559,3 +1559,48 @@ Description: Symbol - HCMID. This file shows the UFSHCD manufacturer id. The Manufacturer ID is defined by JEDEC in JEDEC-JEP106. The file is read only. + +What: /sys/bus/platform/drivers/ufshcd/*/critical_health +What: /sys/bus/platform/devices/*.ufs/critical_health +Date: February 2025 +Contact: Avri Altman <avri.altman@wdc.com> +Description: Report the number of times a critical health event has been + reported by a UFS device. Further insight into the specific + issue can be gained by reading one of: bPreEOLInfo, + bDeviceLifeTimeEstA, bDeviceLifeTimeEstB, + bWriteBoosterBufferLifeTimeEst, and bRPMBLifeTimeEst. + + The file is read only. + +What: /sys/bus/platform/drivers/ufshcd/*/clkscale_enable +What: /sys/bus/platform/devices/*.ufs/clkscale_enable +Date: January 2025 +Contact: Ziqi Chen <quic_ziqichen@quicinc.com> +Description: + This attribute shows whether the UFS clock scaling is enabled or not. + And it can be used to enable/disable the clock scaling by writing + 1 or 0 to this attribute. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/clkgate_enable +What: /sys/bus/platform/devices/*.ufs/clkgate_enable +Date: January 2025 +Contact: Ziqi Chen <quic_ziqichen@quicinc.com> +Description: + This attribute shows whether the UFS clock gating is enabled or not. + And it can be used to enable/disable the clock gating by writing + 1 or 0 to this attribute. + + The attribute is read/write. + +What: /sys/bus/platform/drivers/ufshcd/*/clkgate_delay_ms +What: /sys/bus/platform/devices/*.ufs/clkgate_delay_ms +Date: January 2025 +Contact: Ziqi Chen <quic_ziqichen@quicinc.com> +Description: + This attribute shows and sets the number of milliseconds of idle time + before the UFS driver starts to perform clock gating. This can + prevent the UFS from frequently performing clock gating/ungating. + + The attribute is read/write. diff --git a/Documentation/arch/powerpc/cxlflash.rst b/Documentation/arch/powerpc/cxlflash.rst deleted file mode 100644 index e8f488acfa41..000000000000 --- a/Documentation/arch/powerpc/cxlflash.rst +++ /dev/null @@ -1,433 +0,0 @@ -================================ -Coherent Accelerator (CXL) Flash -================================ - -Introduction -============ - - The IBM Power architecture provides support for CAPI (Coherent - Accelerator Power Interface), which is available to certain PCIe slots - on Power 8 systems. CAPI can be thought of as a special tunneling - protocol through PCIe that allow PCIe adapters to look like special - purpose co-processors which can read or write an application's - memory and generate page faults. As a result, the host interface to - an adapter running in CAPI mode does not require the data buffers to - be mapped to the device's memory (IOMMU bypass) nor does it require - memory to be pinned. - - On Linux, Coherent Accelerator (CXL) kernel services present CAPI - devices as a PCI device by implementing a virtual PCI host bridge. - This abstraction simplifies the infrastructure and programming - model, allowing for drivers to look similar to other native PCI - device drivers. - - CXL provides a mechanism by which user space applications can - directly talk to a device (network or storage) bypassing the typical - kernel/device driver stack. The CXL Flash Adapter Driver enables a - user space application direct access to Flash storage. - - The CXL Flash Adapter Driver is a kernel module that sits in the - SCSI stack as a low level device driver (below the SCSI disk and - protocol drivers) for the IBM CXL Flash Adapter. This driver is - responsible for the initialization of the adapter, setting up the - special path for user space access, and performing error recovery. It - communicates directly the Flash Accelerator Functional Unit (AFU) - as described in Documentation/arch/powerpc/cxl.rst. - - The cxlflash driver supports two, mutually exclusive, modes of - operation at the device (LUN) level: - - - Any flash device (LUN) can be configured to be accessed as a - regular disk device (i.e.: /dev/sdc). This is the default mode. - - - Any flash device (LUN) can be configured to be accessed from - user space with a special block library. This mode further - specifies the means of accessing the device and provides for - either raw access to the entire LUN (referred to as direct - or physical LUN access) or access to a kernel/AFU-mediated - partition of the LUN (referred to as virtual LUN access). The - segmentation of a disk device into virtual LUNs is assisted - by special translation services provided by the Flash AFU. - -Overview -======== - - The Coherent Accelerator Interface Architecture (CAIA) introduces a - concept of a master context. A master typically has special privileges - granted to it by the kernel or hypervisor allowing it to perform AFU - wide management and control. The master may or may not be involved - directly in each user I/O, but at the minimum is involved in the - initial setup before the user application is allowed to send requests - directly to the AFU. - - The CXL Flash Adapter Driver establishes a master context with the - AFU. It uses memory mapped I/O (MMIO) for this control and setup. The - Adapter Problem Space Memory Map looks like this:: - - +-------------------------------+ - | 512 * 64 KB User MMIO | - | (per context) | - | User Accessible | - +-------------------------------+ - | 512 * 128 B per context | - | Provisioning and Control | - | Trusted Process accessible | - +-------------------------------+ - | 64 KB Global | - | Trusted Process accessible | - +-------------------------------+ - - This driver configures itself into the SCSI software stack as an - adapter driver. The driver is the only entity that is considered a - Trusted Process to program the Provisioning and Control and Global - areas in the MMIO Space shown above. The master context driver - discovers all LUNs attached to the CXL Flash adapter and instantiates - scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN - seen from each path. - - Once these scsi block devices are instantiated, an application - written to a specification provided by the block library may get - access to the Flash from user space (without requiring a system call). - - This master context driver also provides a series of ioctls for this - block library to enable this user space access. The driver supports - two modes for accessing the block device. - - The first mode is called a virtual mode. In this mode a single scsi - block device (/dev/sdb) may be carved up into any number of distinct - virtual LUNs. The virtual LUNs may be resized as long as the sum of - the sizes of all the virtual LUNs, along with the meta-data associated - with it does not exceed the physical capacity. - - The second mode is called the physical mode. In this mode a single - block device (/dev/sdb) may be opened directly by the block library - and the entire space for the LUN is available to the application. - - Only the physical mode provides persistence of the data. i.e. The - data written to the block device will survive application exit and - restart and also reboot. The virtual LUNs do not persist (i.e. do - not survive after the application terminates or the system reboots). - - -Block library API -================= - - Applications intending to get access to the CXL Flash from user - space should use the block library, as it abstracts the details of - interfacing directly with the cxlflash driver that are necessary for - performing administrative actions (i.e.: setup, tear down, resize). - The block library can be thought of as a 'user' of services, - implemented as IOCTLs, that are provided by the cxlflash driver - specifically for devices (LUNs) operating in user space access - mode. While it is not a requirement that applications understand - the interface between the block library and the cxlflash driver, - a high-level overview of each supported service (IOCTL) is provided - below. - - The block library can be found on GitHub: - http://github.com/open-power/capiflash - - -CXL Flash Driver LUN IOCTLs -=========================== - - Users, such as the block library, that wish to interface with a flash - device (LUN) via user space access need to use the services provided - by the cxlflash driver. As these services are implemented as ioctls, - a file descriptor handle must first be obtained in order to establish - the communication channel between a user and the kernel. This file - descriptor is obtained by opening the device special file associated - with the scsi disk device (/dev/sdb) that was created during LUN - discovery. As per the location of the cxlflash driver within the - SCSI protocol stack, this open is actually not seen by the cxlflash - driver. Upon successful open, the user receives a file descriptor - (herein referred to as fd1) that should be used for issuing the - subsequent ioctls listed below. - - The structure definitions for these IOCTLs are available in: - uapi/scsi/cxlflash_ioctl.h - -DK_CXLFLASH_ATTACH ------------------- - - This ioctl obtains, initializes, and starts a context using the CXL - kernel services. These services specify a context id (u16) by which - to uniquely identify the context and its allocated resources. The - services additionally provide a second file descriptor (herein - referred to as fd2) that is used by the block library to initiate - memory mapped I/O (via mmap()) to the CXL flash device and poll for - completion events. This file descriptor is intentionally installed by - this driver and not the CXL kernel services to allow for intermediary - notification and access in the event of a non-user-initiated close(), - such as a killed process. This design point is described in further - detail in the description for the DK_CXLFLASH_DETACH ioctl. - - There are a few important aspects regarding the "tokens" (context id - and fd2) that are provided back to the user: - - - These tokens are only valid for the process under which they - were created. The child of a forked process cannot continue - to use the context id or file descriptor created by its parent - (see DK_CXLFLASH_VLUN_CLONE for further details). - - - These tokens are only valid for the lifetime of the context and - the process under which they were created. Once either is - destroyed, the tokens are to be considered stale and subsequent - usage will result in errors. - - - A valid adapter file descriptor (fd2 >= 0) is only returned on - the initial attach for a context. Subsequent attaches to an - existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present) - do not provide the adapter file descriptor as it was previously - made known to the application. - - - When a context is no longer needed, the user shall detach from - the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl - returns with a valid adapter file descriptor and the return flag - DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ - close the adapter file descriptor following a successful detach. - - - When this ioctl returns with a valid fd2 and the return flag - DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_ - close fd2 in the following circumstances: - - + Following a successful detach of the last user of the context - + Following a successful recovery on the context's original fd2 - + In the child process of a fork(), following a clone ioctl, - on the fd2 associated with the source context - - - At any time, a close on fd2 will invalidate the tokens. Applications - should exercise caution to only close fd2 when appropriate (outlined - in the previous bullet) to avoid premature loss of I/O. - -DK_CXLFLASH_USER_DIRECT ------------------------ - This ioctl is responsible for transitioning the LUN to direct - (physical) mode access and configuring the AFU for direct access from - user space on a per-context basis. Additionally, the block size and - last logical block address (LBA) are returned to the user. - - As mentioned previously, when operating in user space access mode, - LUNs may be accessed in whole or in part. Only one mode is allowed - at a time and if one mode is active (outstanding references exist), - requests to use the LUN in a different mode are denied. - - The AFU is configured for direct access from user space by adding an - entry to the AFU's resource handle table. The index of the entry is - treated as a resource handle that is returned to the user. The user - is then able to use the handle to reference the LUN during I/O. - -DK_CXLFLASH_USER_VIRTUAL ------------------------- - This ioctl is responsible for transitioning the LUN to virtual mode - of access and configuring the AFU for virtual access from user space - on a per-context basis. Additionally, the block size and last logical - block address (LBA) are returned to the user. - - As mentioned previously, when operating in user space access mode, - LUNs may be accessed in whole or in part. Only one mode is allowed - at a time and if one mode is active (outstanding references exist), - requests to use the LUN in a different mode are denied. - - The AFU is configured for virtual access from user space by adding - an entry to the AFU's resource handle table. The index of the entry - is treated as a resource handle that is returned to the user. The - user is then able to use the handle to reference the LUN during I/O. - - By default, the virtual LUN is created with a size of 0. The user - would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow - the virtual LUN to a desired size. To avoid having to perform this - resize for the initial creation of the virtual LUN, the user has the - option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL - ioctl, such that when success is returned to the user, the - resource handle that is provided is already referencing provisioned - storage. This is reflected by the last LBA being a non-zero value. - - When a LUN is accessible from more than one port, this ioctl will - return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This - provides the user with a hint that I/O can be retried in the event - of an I/O error as the LUN can be reached over multiple paths. - -DK_CXLFLASH_VLUN_RESIZE ------------------------ - This ioctl is responsible for resizing a previously created virtual - LUN and will fail if invoked upon a LUN that is not in virtual - mode. Upon success, an updated last LBA is returned to the user - indicating the new size of the virtual LUN associated with the - resource handle. - - The partitioning of virtual LUNs is jointly mediated by the cxlflash - driver and the AFU. An allocation table is kept for each LUN that is - operating in the virtual mode and used to program a LUN translation - table that the AFU references when provided with a resource handle. - - This ioctl can return -EAGAIN if an AFU sync operation takes too long. - In addition to returning a failure to user, cxlflash will also schedule - an asynchronous AFU reset. Should the user choose to retry the operation, - it is expected to succeed. If this ioctl fails with -EAGAIN, the user - can either retry the operation or treat it as a failure. - -DK_CXLFLASH_RELEASE -------------------- - This ioctl is responsible for releasing a previously obtained - reference to either a physical or virtual LUN. This can be - thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or - DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle - is no longer valid and the entry in the resource handle table is - made available to be used again. - - As part of the release process for virtual LUNs, the virtual LUN - is first resized to 0 to clear out and free the translation tables - associated with the virtual LUN reference. - -DK_CXLFLASH_DETACH ------------------- - This ioctl is responsible for unregistering a context with the - cxlflash driver and release outstanding resources that were - not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon - success, all "tokens" which had been provided to the user from the - DK_CXLFLASH_ATTACH onward are no longer valid. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ close the fd2 associated with the context - following the detach of the final user of the context. - -DK_CXLFLASH_VLUN_CLONE ----------------------- - This ioctl is responsible for cloning a previously created - context to a more recently created context. It exists solely to - support maintaining user space access to storage after a process - forks. Upon success, the child process (which invoked the ioctl) - will have access to the same LUNs via the same resource handle(s) - as the parent, but under a different context. - - Context sharing across processes is not supported with CXL and - therefore each fork must be met with establishing a new context - for the child process. This ioctl simplifies the state management - and playback required by a user in such a scenario. When a process - forks, child process can clone the parents context by first creating - a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to - perform the clone from the parent to the child. - - The clone itself is fairly simple. The resource handle and lun - translation tables are copied from the parent context to the child's - and then synced with the AFU. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ close the fd2 associated with the source - context (still resident/accessible in the parent process) following the - clone. This is to avoid a stale entry in the file descriptor table of the - child process. - - This ioctl can return -EAGAIN if an AFU sync operation takes too long. - In addition to returning a failure to user, cxlflash will also schedule - an asynchronous AFU reset. Should the user choose to retry the operation, - it is expected to succeed. If this ioctl fails with -EAGAIN, the user - can either retry the operation or treat it as a failure. - -DK_CXLFLASH_VERIFY ------------------- - This ioctl is used to detect various changes such as the capacity of - the disk changing, the number of LUNs visible changing, etc. In cases - where the changes affect the application (such as a LUN resize), the - cxlflash driver will report the changed state to the application. - - The user calls in when they want to validate that a LUN hasn't been - changed in response to a check condition. As the user is operating out - of band from the kernel, they will see these types of events without - the kernel's knowledge. When encountered, the user's architected - behavior is to call in to this ioctl, indicating what they want to - verify and passing along any appropriate information. For now, only - verifying a LUN change (ie: size different) with sense data is - supported. - -DK_CXLFLASH_RECOVER_AFU ------------------------ - This ioctl is used to drive recovery (if such an action is warranted) - of a specified user context. Any state associated with the user context - is re-established upon successful recovery. - - User contexts are put into an error condition when the device needs to - be reset or is terminating. Users are notified of this error condition - by seeing all 0xF's on an MMIO read. Upon encountering this, the - architected behavior for a user is to call into this ioctl to recover - their context. A user may also call into this ioctl at any time to - check if the device is operating normally. If a failure is returned - from this ioctl, the user is expected to gracefully clean up their - context via release/detach ioctls. Until they do, the context they - hold is not relinquished. The user may also optionally exit the process - at which time the context/resources they held will be freed as part of - the release fop. - - When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful - attach, the application _must_ unmap and close the fd2 associated with the - original context following this ioctl returning success and indicating that - the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET). - -DK_CXLFLASH_MANAGE_LUN ----------------------- - This ioctl is used to switch a LUN from a mode where it is available - for file-system access (legacy), to a mode where it is set aside for - exclusive user space access (superpipe). In case a LUN is visible - across multiple ports and adapters, this ioctl is used to uniquely - identify each LUN by its World Wide Node Name (WWNN). - - -CXL Flash Driver Host IOCTLs -============================ - - Each host adapter instance that is supported by the cxlflash driver - has a special character device associated with it to enable a set of - host management function. These character devices are hosted in a - class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`. - - Applications can be written to perform various functions using the - host ioctl APIs below. - - The structure definitions for these IOCTLs are available in: - uapi/scsi/cxlflash_ioctl.h - -HT_CXLFLASH_LUN_PROVISION -------------------------- - This ioctl is used to create and delete persistent LUNs on cxlflash - devices that lack an external LUN management interface. It is only - valid when used with AFUs that support the LUN provision capability. - - When sufficient space is available, LUNs can be created by specifying - the target port to host the LUN and a desired size in 4K blocks. Upon - success, the LUN ID and WWID of the created LUN will be returned and - the SCSI bus can be scanned to detect the change in LUN topology. Note - that partial allocations are not supported. Should a creation fail due - to a space issue, the target port can be queried for its current LUN - geometry. - - To remove a LUN, the device must first be disassociated from the Linux - SCSI subsystem. The LUN deletion can then be initiated by specifying a - target port and LUN ID. Upon success, the LUN geometry associated with - the port will be updated to reflect new number of provisioned LUNs and - available capacity. - - To query the LUN geometry of a port, the target port is specified and - upon success, the following information is presented: - - - Maximum number of provisioned LUNs allowed for the port - - Current number of provisioned LUNs for the port - - Maximum total capacity of provisioned LUNs for the port (4K blocks) - - Current total capacity of provisioned LUNs for the port (4K blocks) - - With this information, the number of available LUNs and capacity can be - can be calculated. - -HT_CXLFLASH_AFU_DEBUG ---------------------- - This ioctl is used to debug AFUs by supporting a command pass-through - interface. It is only valid when used with AFUs that support the AFU - debug capability. - - With exception of buffer management, AFU debug commands are opaque to - cxlflash and treated as pass-through. For debug commands that do require - data transfer, the user supplies an adequately sized data buffer and must - specify the data transfer direction with respect to the host. There is a - maximum transfer size of 256K imposed. Note that partial read completions - are not supported - when errors are experienced with a host read data - transfer, the data buffer is not copied back to the user. diff --git a/Documentation/arch/powerpc/index.rst b/Documentation/arch/powerpc/index.rst index 9749f6dc258f..995268530f21 100644 --- a/Documentation/arch/powerpc/index.rst +++ b/Documentation/arch/powerpc/index.rst @@ -13,7 +13,6 @@ powerpc cpu_families cpu_features cxl - cxlflash dawr-power9 dexcr dscr diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml index b2adc7174177..dca16e202da9 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml @@ -14,9 +14,8 @@ allOf: description: | The Microchip LAN966x outband interrupt controller (OIC) maps the internal - interrupt sources of the LAN966x device to an external interrupt. - When the LAN966x device is used as a PCI device, the external interrupt is - routed to the PCI interrupt. + interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x + device is used as a PCI device. properties: compatible: diff --git a/Documentation/devicetree/bindings/ufs/rockchip,rk3576-ufshc.yaml b/Documentation/devicetree/bindings/ufs/rockchip,rk3576-ufshc.yaml new file mode 100644 index 000000000000..c7d17cf4dc42 --- /dev/null +++ b/Documentation/devicetree/bindings/ufs/rockchip,rk3576-ufshc.yaml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ufs/rockchip,rk3576-ufshc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip UFS Host Controller + +maintainers: + - Shawn Lin <shawn.lin@rock-chips.com> + +allOf: + - $ref: ufs-common.yaml + +properties: + compatible: + const: rockchip,rk3576-ufshc + + reg: + maxItems: 5 + + reg-names: + items: + - const: hci + - const: mphy + - const: hci_grf + - const: mphy_grf + - const: hci_apb + + clocks: + maxItems: 4 + + clock-names: + items: + - const: core + - const: pclk + - const: pclk_mphy + - const: ref_out + + power-domains: + maxItems: 1 + + resets: + maxItems: 4 + + reset-names: + items: + - const: biu + - const: sys + - const: ufs + - const: grf + + reset-gpios: + maxItems: 1 + description: | + GPIO specifiers for host to reset the whole UFS device including PHY and + memory. This gpio is active low and should choose the one whose high output + voltage is lower than 1.5V based on the UFS spec. + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - interrupts + - power-domains + - resets + - reset-names + - reset-gpios + +unevaluatedProperties: false + +examples: + - | + #include <dt-bindings/clock/rockchip,rk3576-cru.h> + #include <dt-bindings/reset/rockchip,rk3576-cru.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/power/rockchip,rk3576-power.h> + #include <dt-bindings/pinctrl/rockchip.h> + #include <dt-bindings/gpio/gpio.h> + + soc { + #address-cells = <2>; + #size-cells = <2>; + + ufshc: ufshc@2a2d0000 { + compatible = "rockchip,rk3576-ufshc"; + reg = <0x0 0x2a2d0000 0x0 0x10000>, + <0x0 0x2b040000 0x0 0x10000>, + <0x0 0x2601f000 0x0 0x1000>, + <0x0 0x2603c000 0x0 0x1000>, + <0x0 0x2a2e0000 0x0 0x10000>; + reg-names = "hci", "mphy", "hci_grf", "mphy_grf", "hci_apb"; + clocks = <&cru ACLK_UFS_SYS>, <&cru PCLK_USB_ROOT>, <&cru PCLK_MPHY>, + <&cru CLK_REF_UFS_CLKOUT>; + clock-names = "core", "pclk", "pclk_mphy", "ref_out"; + interrupts = <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>; + power-domains = <&power RK3576_PD_USB>; + resets = <&cru SRST_A_UFS_BIU>, <&cru SRST_A_UFS_SYS>, <&cru SRST_A_UFS>, + <&cru SRST_P_UFS_GRF>; + reset-names = "biu", "sys", "ufs", "grf"; + reset-gpios = <&gpio4 RK_PD0 GPIO_ACTIVE_LOW>; + }; + }; diff --git a/Documentation/filesystems/bcachefs/SubmittingPatches.rst b/Documentation/filesystems/bcachefs/SubmittingPatches.rst new file mode 100644 index 000000000000..026b12ae0d6a --- /dev/null +++ b/Documentation/filesystems/bcachefs/SubmittingPatches.rst @@ -0,0 +1,98 @@ +Submitting patches to bcachefs: +=============================== + +Patches must be tested before being submitted, either with the xfstests suite +[0], or the full bcachefs test suite in ktest [1], depending on what's being +touched. Note that ktest wraps xfstests and will be an easier method to running +it for most users; it includes single-command wrappers for all the mainstream +in-kernel local filesystems. + +Patches will undergo more testing after being merged (including +lockdep/kasan/preempt/etc. variants), these are not generally required to be +run by the submitter - but do put some thought into what you're changing and +which tests might be relevant, e.g. are you dealing with tricky memory layout +work? kasan, are you doing locking work? then lockdep; and ktest includes +single-command variants for the debug build types you'll most likely need. + +The exception to this rule is incomplete WIP/RFC patches: if you're working on +something nontrivial, it's encouraged to send out a WIP patch to let people +know what you're doing and make sure you're on the right track. Just make sure +it includes a brief note as to what's done and what's incomplete, to avoid +confusion. + +Rigorous checkpatch.pl adherence is not required (many of its warnings are +considered out of date), but try not to deviate too much without reason. + +Focus on writing code that reads well and is organized well; code should be +aesthetically pleasing. + +CI: +=== + +Instead of running your tests locally, when running the full test suite it's +prefereable to let a server farm do it in parallel, and then have the results +in a nice test dashboard (which can tell you which failures are new, and +presents results in a git log view, avoiding the need for most bisecting). + +That exists [2], and community members may request an account. If you work for +a big tech company, you'll need to help out with server costs to get access - +but the CI is not restricted to running bcachefs tests: it runs any ktest test +(which generally makes it easy to wrap other tests that can run in qemu). + +Other things to think about: +============================ + +- How will we debug this code? Is there sufficient introspection to diagnose + when something starts acting wonky on a user machine? + + We don't necessarily need every single field of every data structure visible + with introspection, but having the important fields of all the core data + types wired up makes debugging drastically easier - a bit of thoughtful + foresight greatly reduces the need to have people build custom kernels with + debug patches. + + More broadly, think about all the debug tooling that might be needed. + +- Does it make the codebase more or less of a mess? Can we also try to do some + organizing, too? + +- Do new tests need to be written? New assertions? How do we know and verify + that the code is correct, and what happens if something goes wrong? + + We don't yet have automated code coverage analysis or easy fault injection - + but for now, pretend we did and ask what they might tell us. + + Assertions are hugely important, given that we don't yet have a systems + language that can do ergonomic embedded correctness proofs. Hitting an assert + in testing is much better than wandering off into undefined behaviour la-la + land - use them. Use them judiciously, and not as a replacement for proper + error handling, but use them. + +- Does it need to be performance tested? Should we add new peformance counters? + + bcachefs has a set of persistent runtime counters which can be viewed with + the 'bcachefs fs top' command; this should give users a basic idea of what + their filesystem is currently doing. If you're doing a new feature or looking + at old code, think if anything should be added. + +- If it's a new on disk format feature - have upgrades and downgrades been + tested? (Automated tests exists but aren't in the CI, due to the hassle of + disk image management; coordinate to have them run.) + +Mailing list, IRC: +================== + +Patches should hit the list [3], but much discussion and code review happens on +IRC as well [4]; many people appreciate the more conversational approach and +quicker feedback. + +Additionally, we have a lively user community doing excellent QA work, which +exists primarily on IRC. Please make use of that resource; user feedback is +important for any nontrivial feature, and documenting it in commit messages +would be a good idea. + +[0]: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git +[1]: https://evilpiepirate.org/git/ktest.git/ +[2]: https://evilpiepirate.org/~testdashboard/ci/ +[3]: linux-bcachefs@vger.kernel.org +[4]: irc.oftc.net#bcache, #bcachefs-dev diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst index 95fc4b90739e..7db4d7ceab58 100644 --- a/Documentation/filesystems/bcachefs/index.rst +++ b/Documentation/filesystems/bcachefs/index.rst @@ -9,4 +9,5 @@ bcachefs Documentation :numbered: CodingStyle + SubmittingPatches errorcodes diff --git a/Documentation/scsi/st.rst b/Documentation/scsi/st.rst index d3b28c28d74c..b4a092faa9c8 100644 --- a/Documentation/scsi/st.rst +++ b/Documentation/scsi/st.rst @@ -157,6 +157,11 @@ enabled driver and mode options. The value in the file is a bit mask where the bit definitions are the same as those used with MTSETDRVBUFFER in setting the options. +Each directory contains the entry 'position_lost_in_reset'. If this value is +one, reading and writing to the device is blocked after device reset. Most +devices rewind the tape after reset and the writes/read don't access the +tape position the user expects. + A link named 'tape' is made from the SCSI device directory to the class directory corresponding to the mode 0 auto-rewind device (e.g., st0). diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 6d1465315df3..efe133c50615 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -373,7 +373,7 @@ Code Seq# Include File Comments 0xC0 00-0F linux/usb/iowarrior.h 0xCA 00-0F uapi/misc/cxl.h 0xCA 10-2F uapi/misc/ocxl.h -0xCA 80-BF uapi/scsi/cxlflash_ioctl.h +0xCA 80-BF uapi/scsi/cxlflash_ioctl.h Dead since 6.14 0xCB 00-1F CBM serial IEC bus in development: <mailto:michael.klein@puffin.lb.shuttle.de> 0xCC 00-0F drivers/misc/ibmvmc.h pseries VMC driver diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0d1c3a820ce6..2b52eb77e29c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1419,7 +1419,7 @@ fetch) is injected in the guest. S390: ^^^^^ -Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set. +Returns -EINVAL or -EEXIST if the VM has the KVM_VM_S390_UCONTROL flag set. Returns -EINVAL if called on a protected VM. 4.36 KVM_SET_TSS_ADDR diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..ac8b15cec513 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2209,7 +2209,6 @@ F: sound/soc/codecs/cs42l84.* F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT -M: Hector Martin <marcan@marcan.st> M: Sven Peter <sven@svenpeter.dev> R: Alyssa Rosenzweig <alyssa@rosenzweig.io> L: asahi@lists.linux.dev @@ -3955,6 +3954,7 @@ M: Kent Overstreet <kent.overstreet@linux.dev> L: linux-bcachefs@vger.kernel.org S: Supported C: irc://irc.oftc.net/bcache +P: Documentation/filesystems/bcachefs/SubmittingPatches.rst T: git https://evilpiepirate.org/git/bcachefs.git F: fs/bcachefs/ F: Documentation/filesystems/bcachefs/ @@ -6318,15 +6318,6 @@ F: drivers/misc/cxl/ F: include/misc/cxl* F: include/uapi/misc/cxl.h -CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER -M: Manoj N. Kumar <manoj@linux.ibm.com> -M: Uma Krishnan <ukrishn@linux.ibm.com> -L: linux-scsi@vger.kernel.org -S: Obsolete -F: Documentation/arch/powerpc/cxlflash.rst -F: drivers/scsi/cxlflash/ -F: include/uapi/scsi/cxlflash_ioctl.h - CYBERPRO FB DRIVER M: Russell King <linux@armlinux.org.uk> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -9418,7 +9409,7 @@ F: fs/freevxfs/ FREEZER M: "Rafael J. Wysocki" <rafael@kernel.org> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.rst @@ -9878,7 +9869,7 @@ S: Maintained F: drivers/staging/gpib/ GPIO ACPI SUPPORT -M: Mika Westerberg <mika.westerberg@linux.intel.com> +M: Mika Westerberg <westeri@kernel.org> M: Andy Shevchenko <andriy.shevchenko@linux.intel.com> L: linux-gpio@vger.kernel.org L: linux-acpi@vger.kernel.org @@ -10253,7 +10244,7 @@ F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" <rafael@kernel.org> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -13124,8 +13115,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Pavel Machek <pavel@ucw.cz> M: Lee Jones <lee@kernel.org> +M: Pavel Machek <pavel@kernel.org> L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git @@ -16462,6 +16453,22 @@ F: include/net/dsa.h F: net/dsa/ F: tools/testing/selftests/drivers/net/dsa/ +NETWORKING [ETHTOOL] +M: Andrew Lunn <andrew@lunn.ch> +M: Jakub Kicinski <kuba@kernel.org> +F: Documentation/netlink/specs/ethtool.yaml +F: Documentation/networking/ethtool-netlink.rst +F: include/linux/ethtool* +F: include/uapi/linux/ethtool* +F: net/ethtool/ +F: tools/testing/selftests/drivers/net/*/ethtool* + +NETWORKING [ETHTOOL CABLE TEST] +M: Andrew Lunn <andrew@lunn.ch> +F: net/ethtool/cabletest.c +F: tools/testing/selftests/drivers/net/*/ethtool* +K: cable_test + NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> M: Eric Dumazet <edumazet@google.com> @@ -16621,6 +16628,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> M: Neal Cardwell <ncardwell@google.com> +R: Kuniyuki Iwashima <kuniyu@amazon.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -16648,6 +16656,31 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* +NETWORKING [SOCKETS] +M: Eric Dumazet <edumazet@google.com> +M: Kuniyuki Iwashima <kuniyu@amazon.com> +M: Paolo Abeni <pabeni@redhat.com> +M: Willem de Bruijn <willemb@google.com> +S: Maintained +F: include/linux/sock_diag.h +F: include/linux/socket.h +F: include/linux/sockptr.h +F: include/net/sock.h +F: include/net/sock_reuseport.h +F: include/uapi/linux/socket.h +F: net/core/*sock* +F: net/core/scm.c +F: net/socket.c + +NETWORKING [UNIX SOCKETS] +M: Kuniyuki Iwashima <kuniyu@amazon.com> +S: Maintained +F: include/net/af_unix.h +F: include/net/netns/unix.h +F: include/uapi/linux/unix_diag.h +F: net/unix/ +F: tools/testing/selftests/net/af_unix/ + NETXEN (1/10) GbE SUPPORT M: Manish Chopra <manishc@marvell.com> M: Rahul Verma <rahulv@marvell.com> @@ -16781,7 +16814,7 @@ F: include/linux/tick.h F: kernel/time/tick*.* NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS) -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> M: Sakari Ailus <sakari.ailus@iki.fi> L: linux-media@vger.kernel.org S: Maintained @@ -17713,6 +17746,7 @@ L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained W: http://openvswitch.org +F: Documentation/networking/openvswitch.rst F: include/uapi/linux/openvswitch.h F: net/openvswitch/ F: tools/testing/selftests/net/openvswitch/ @@ -22806,7 +22840,7 @@ F: drivers/sh/ SUSPEND TO RAM M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <len.brown@intel.com> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org diff --git a/Makefile b/Makefile index 9e0d63d9d94b..89628e354ca7 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 4d7c46f50382..50c82187e60e 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) +#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT)) /* * These are used to set parameters in the core dumps. @@ -137,10 +137,6 @@ extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX) \ - set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : PER_LINUX) - extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; extern int alpha_l2_cacheshape; diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 635f0a5f5bbd..02e8817a8921 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -360,7 +360,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +/* We have our own get_unmapped_area */ #define HAVE_ARCH_UNMAPPED_AREA #endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 55bb1c09fd39..5dce5518a211 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -8,23 +8,19 @@ #ifndef __ASM_ALPHA_PROCESSOR_H #define __ASM_ALPHA_PROCESSOR_H -#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */ - /* * We have a 42-bit user address space: 4TB user VM... */ #define TASK_SIZE (0x40000000000UL) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP (0x00120000000UL) #define STACK_TOP_MAX 0x00120000000UL /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) +#define TASK_UNMAPPED_BASE (TASK_SIZE / 2) /* This is dead. Everything has been moved to thread_info. */ struct thread_struct { }; diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 86185021f75a..a08e8edef1a4 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1210,8 +1210,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) return ret; } -/* Get an address range which is currently unmapped. Similar to the - generic version except that we know how to honor ADDR_LIMIT_32BIT. */ +/* Get an address range which is currently unmapped. */ static unsigned long arch_get_unmapped_area_1(unsigned long addr, unsigned long len, @@ -1230,13 +1229,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags) { - unsigned long limit; - - /* "32 bit" actually means 31 bit, since pointers sign extend. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; + unsigned long limit = TASK_SIZE; if (len > limit) return -ENOMEM; diff --git a/arch/arm64/boot/dts/rockchip/rk3576.dtsi b/arch/arm64/boot/dts/rockchip/rk3576.dtsi index 4dde954043ef..bd55bd8a67cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3576.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3576.dtsi @@ -1221,6 +1221,30 @@ gmac1_mtl_tx_setup: tx-queues-config { }; }; + ufshc: ufshc@2a2d0000 { + compatible = "rockchip,rk3576-ufshc"; + reg = <0x0 0x2a2d0000 0x0 0x10000>, + <0x0 0x2b040000 0x0 0x10000>, + <0x0 0x2601f000 0x0 0x1000>, + <0x0 0x2603c000 0x0 0x1000>, + <0x0 0x2a2e0000 0x0 0x10000>; + reg-names = "hci", "mphy", "hci_grf", "mphy_grf", "hci_apb"; + clocks = <&cru ACLK_UFS_SYS>, <&cru PCLK_USB_ROOT>, <&cru PCLK_MPHY>, + <&cru CLK_REF_UFS_CLKOUT>; + clock-names = "core", "pclk", "pclk_mphy", "ref_out"; + assigned-clocks = <&cru CLK_REF_OSC_MPHY>; + assigned-clock-parents = <&cru CLK_REF_MPHY_26M>; + interrupts = <GIC_SPI 361 IRQ_TYPE_LEVEL_HIGH>; + power-domains = <&power RK3576_PD_USB>; + pinctrl-0 = <&ufs_refclk>; + pinctrl-names = "default"; + resets = <&cru SRST_A_UFS_BIU>, <&cru SRST_A_UFS_SYS>, + <&cru SRST_A_UFS>, <&cru SRST_P_UFS_GRF>; + reset-names = "biu", "sys", "ufs", "grf"; + reset-gpios = <&gpio4 RK_PD0 GPIO_ACTIVE_LOW>; + status = "disabled"; + }; + sdmmc: mmc@2a310000 { compatible = "rockchip,rk3576-dw-mshc"; reg = <0x0 0x2a310000 0x0 0x4000>; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3d243366536..231c0cd9c7b4 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -471,10 +471,8 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); - if (should_fire != ctx->irq.level) { + if (should_fire != ctx->irq.level) kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); - return; - } kvm_timer_update_status(ctx, should_fire); @@ -761,21 +759,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, timer_irq(map->direct_ptimer), &arch_timer_irq_ops); WARN_ON_ONCE(ret); - - /* - * The virtual offset behaviour is "interesting", as it - * always applies when HCR_EL2.E2H==0, but only when - * accessed from EL1 when HCR_EL2.E2H==1. So make sure we - * track E2H when putting the HV timer in "direct" mode. - */ - if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { - struct arch_timer_offset *offs = &map->direct_vtimer->offset; - - if (vcpu_el2_e2h_is_set(vcpu)) - offs->vcpu_offset = NULL; - else - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); - } } } @@ -976,31 +959,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) * which allows trapping of the timer registers even with NV2. * Still, this is still worse than FEAT_NV on its own. Meh. */ - if (!vcpu_el2_e2h_is_set(vcpu)) { - if (cpus_have_final_cap(ARM64_HAS_ECV)) - return; - - /* - * A non-VHE guest hypervisor doesn't have any direct access - * to its timers: the EL2 registers trap (and the HW is - * fully emulated), while the EL0 registers access memory - * despite the access being notionally direct. Boo. - * - * We update the hardware timer registers with the - * latest value written by the guest to the VNCR page - * and let the hardware take care of the rest. - */ - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL); - } else { + if (!cpus_have_final_cap(ARM64_HAS_ECV)) { /* * For a VHE guest hypervisor, the EL2 state is directly - * stored in the host EL1 timers, while the emulated EL0 + * stored in the host EL1 timers, while the emulated EL1 * state is stored in the VNCR page. The latter could have * been updated behind our back, and we must reset the * emulation of the timers. + * + * A non-VHE guest hypervisor doesn't have any direct access + * to its timers: the EL2 registers trap despite being + * notionally direct (we use the EL1 HW, as for VHE), while + * the EL1 registers access memory. + * + * In both cases, process the emulated timers on each guest + * exit. Boo. */ struct timer_map map; get_timer_map(vcpu, &map); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 646e806c6ca6..071a7d75be68 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2290,6 +2290,19 @@ static int __init init_subsystems(void) break; case -ENODEV: case -ENXIO: + /* + * No VGIC? No pKVM for you. + * + * Protected mode assumes that VGICv3 is present, so no point + * in trying to hobble along if vgic initialization fails. + */ + if (is_protected_kvm_enabled()) + goto out; + + /* + * Otherwise, userspace could choose to implement a GIC for its + * guest on non-cooperative hardware. + */ vgic_present = false; err = 0; break; @@ -2400,6 +2413,13 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; + + /* + * Flush entire BSS since part of its data containing init symbols is read + * while the MMU is off. + */ + kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start)); } static int __init kvm_hyp_init_protection(u32 hyp_va_bits) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5c134520e180..6e12c070832f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -91,11 +91,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; } +static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state; +} + +static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; fpsimd_sve_flush(); + flush_debug_state(hyp_vcpu); hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; @@ -123,6 +146,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) unsigned int i; fpsimd_sve_sync(&hyp_vcpu->vcpu); + sync_debug_state(hyp_vcpu); host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 33d2ace68665..0c9387d2f507 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) if (!tmp) return -ENOMEM; + swap(kvm->arch.nested_mmus, tmp); + /* * If we went through a realocation, adjust the MMU back-pointers in * the previously initialised kvm_pgtable structures. */ if (kvm->arch.nested_mmus != tmp) for (int i = 0; i < kvm->arch.nested_mmus_size; i++) - tmp[i].pgt->mmu = &tmp[i]; + kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i]; for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) - ret = init_nested_s2_mmu(kvm, &tmp[i]); + ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]); if (ret) { for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++) - kvm_free_stage2_pgd(&tmp[i]); + kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]); return ret; } kvm->arch.nested_mmus_size = num_mmus; - kvm->arch.nested_mmus = tmp; return 0; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6cd1ea7fb55..82430c1e1dd0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static bool access_hv_timer(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!vcpu_el2_e2h_is_set(vcpu)) + return undef_access(vcpu, p, r); + + return access_arch_timer(vcpu, p, r); +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, + EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), + EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 1aa0cb097c9c..7b9a5ea9cad9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -75,7 +75,7 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; cascade_virq = msi_data->cascade_array[srs]->virq; - seq_printf(p, " fsl-msi-%d", cascade_virq); + seq_printf(p, "fsl-msi-%d", cascade_virq); } diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 13f51a6a5bb1..4e73ef46d4b2 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -23,7 +23,6 @@ /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list - * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries @@ -35,7 +34,6 @@ * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures - * @pt_list: list of all page tables used in the shadow guest address space * @shadow_lock: spinlock to protect the shadow gmap list * @parent: pointer to the parent gmap for shadow guest address spaces * @orig_asce: ASCE for which the shadow page table has been created @@ -45,7 +43,6 @@ */ struct gmap { struct list_head list; - struct list_head crst_list; struct mm_struct *mm; struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; @@ -61,7 +58,6 @@ struct gmap { /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; - struct list_head pt_list; spinlock_t shadow_lock; struct gmap *parent; unsigned long orig_asce; @@ -106,23 +102,21 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); void gmap_put(struct gmap *gmap); +void gmap_free(struct gmap *gmap); +struct gmap *gmap_alloc(unsigned long limit); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level); -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +void gmap_unshadow(struct gmap *sg); int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake); int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, @@ -131,24 +125,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake); int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake); -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, int *fake); int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -int gmap_mprotect_notify(struct gmap *, unsigned long start, - unsigned long len, int prot); +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits); void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); int s390_disable_cow_sharing(void); -void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 97c7c8127543..9a367866cab0 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -30,6 +30,8 @@ #define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 +#define KVM_INTERNAL_MEM_SLOTS 1 + /* * These seem to be used for allocating ->chip in the routing table, which we * don't use. 1 is as small as we can get to reduce the needed memory. If we @@ -931,12 +933,14 @@ struct sie_page2 { u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; +struct vsie_page; + struct kvm_s390_vsie { struct mutex mutex; struct radix_tree_root addr_to_page; int page_count; int next; - struct page *pages[KVM_MAX_VCPUS]; + struct vsie_page *pages[KVM_MAX_VCPUS]; }; struct kvm_s390_gisa_iam { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a3b51056a177..3ca5af4cfe43 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -420,9 +420,10 @@ void setup_protection_map(void); #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ +#define PGSTE_ST2_MASK 0x0000ffff00000000UL +#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -2007,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define pmd_pgtable(pmd) \ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) +static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) +{ + unsigned long *pgstes, res; + + pgstes = pgt + _PAGE_ENTRIES; + + res = (pgstes[0] & PGSTE_ST2_MASK) << 16; + res |= pgstes[1] & PGSTE_ST2_MASK; + res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16; + res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32; + + return res; +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index dc332609f2c3..b11f5b6d0bd1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void) } int uv_pin_shared(unsigned long paddr); -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int uv_convert_from_secure_folio(struct folio *folio); void setup_uv(void); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..9f05df2da2f7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -19,19 +19,6 @@ #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; @@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -E2BIG if the folio is large; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()) + */ +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; + if (folio_test_large(folio)) + return -E2BIG; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } - -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; -} - -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. - */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) -{ - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; - -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - } - - /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. - */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; - } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: - /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. - */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); - -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; - - return gmap_make_secure(gmap, gaddr, &uvcb); -} -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); - -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) -{ - struct vm_area_struct *vma; - struct folio_walk fw; - unsigned long uaddr; - struct folio *folio; - int rc; - - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); - /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. - */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: - folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_folio_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 02217fb4ae10..f0ffe874adc2 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9816b0060fbe..f6fded15633a 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -16,6 +16,7 @@ #include <asm/gmap.h> #include <asm/dat-bits.h> #include "kvm-s390.h" +#include "gmap.h" #include "gaccess.h" /* @@ -1392,6 +1393,44 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, return 0; } +/** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long pt_index; + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + /** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu @@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent @@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, */ ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c new file mode 100644 index 000000000000..a6d1dbb04c97 --- /dev/null +++ b/arch/s390/kvm/gmap-vsie.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 nested VMs. + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> +#include <linux/mman.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "kvm-s390.h" +#include "gmap.h" + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + * + * Context: Called with parent->shadow_lock held + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg; + + lockdep_assert_held(&parent->shadow_lock); + list_for_each_entry(sg, &parent->children, list) { + if (!gmap_shadow_valid(sg, asce, edat_level)) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + refcount_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) || + KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private)) + return ERR_PTR(-EFAULT); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + refcount_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + mmap_read_lock(parent->mm); + rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1), + PROT_READ, GMAP_NOTIFY_SHADOW); + mmap_read_unlock(parent->mm); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c new file mode 100644 index 000000000000..02adf151d4de --- /dev/null +++ b/arch/s390/kvm/gmap.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "gmap.h" + +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + +static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + return -EFAULT; + if (folio_test_large(folio)) { + mmap_read_unlock(gmap->mm); + rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); + mmap_read_lock(gmap->mm); + if (rc) + return rc; + folio = page_folio(page); + } + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, gmap->mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = make_folio_secure(folio, uvcb); + folio_unlock(folio); + + /* + * In theory a race is possible and the folio might have become + * large again before the folio_trylock() above. In that case, no + * action is performed and -EAGAIN is returned; the callers will + * have to try again later. + * In most cases this implies running the VM again, getting the same + * exception again, and make another attempt in this function. + * This is expected to happen extremely rarely. + */ + if (rc == -E2BIG) + return -EAGAIN; + /* The folio has too many references, try to shake some off */ + if (rc == -EBUSY) { + mmap_read_unlock(gmap->mm); + kvm_s390_wiggle_split_folio(gmap->mm, folio, false); + mmap_read_lock(gmap->mm); + return -EAGAIN; + } + + return rc; +} + +/** + * gmap_make_secure() - make one guest page secure + * @gmap: the guest gmap + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). + */ +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +{ + struct kvm *kvm = gmap->private; + struct page *page; + int rc = 0; + + lockdep_assert_held(&kvm->srcu); + + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + mmap_read_lock(gmap->mm); + if (page) + rc = __gmap_make_secure(gmap, page, uvcb); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + + return rc; +} + +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = gmap->guest_handle, + .gaddr = gaddr, + }; + + return gmap_make_secure(gmap, gaddr, &uvcb); +} + +/** + * __gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static int __gmap_destroy_page(struct gmap *gmap, struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * See gmap_make_secure(): large folios cannot be secure. Small + * folio implies FW_LEVEL_PTE. + */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + +/** + * gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(gmap->mm); + page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr)); + if (page) + rc = __gmap_destroy_page(gmap, page); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + return rc; +} diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h new file mode 100644 index 000000000000..c8f031c9ea5f --- /dev/null +++ b/arch/s390/kvm/gmap.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016, 2025 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Claudio Imbrenda <imbrenda@linux.ibm.com> + */ + +#ifndef ARCH_KVM_S390_GMAP_H +#define ARCH_KVM_S390_GMAP_H + +#define GMAP_SHADOW_FAKE_TABLE 1ULL + +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); + +/** + * gmap_shadow_valid - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise, the + * caller has to request a new shadow gmap in this case. + * + */ +static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} + +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5bbaadf75dc6..610dd44a948b 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -21,6 +21,7 @@ #include "gaccess.h" #include "trace.h" #include "trace-s390.h" +#include "gmap.h" u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { @@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0); + rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0); if (rc != 0) return rc; @@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE); + rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE); if (rc != 0) return rc; @@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. */ - if (rc == -EINVAL) + if (rc == -EINVAL || rc == -ENXIO) return 0; /* * If we got -EAGAIN here, we simply return it. It will eventually diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d4f031e086fc..07ff0e10cb7f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - u64 uaddr; + u64 uaddr_s, uaddr_i; + int idx; switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ @@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm, if (kvm_is_ucontrol(kvm)) return -EINVAL; e->set = set_adapter_int; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); - if (uaddr == -EFAULT) - return -EFAULT; - e->adapter.summary_addr = uaddr; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); - if (uaddr == -EFAULT) + + idx = srcu_read_lock(&kvm->srcu); + uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); + uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i)) return -EFAULT; - e->adapter.ind_addr = uaddr; + e->adapter.summary_addr = uaddr_s; + e->adapter.ind_addr = uaddr_i; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8080c27d45b..ebecb96bacce 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -50,6 +50,7 @@ #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" +#include "gmap.h" #define CREATE_TRACE_POINTS #include "trace.h" @@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4513,9 +4595,8 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; @@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @flags: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + flags |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + flags &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; - int rc = 0; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) vcpu->stat.exit_null++; break; case PGM_NON_SECURE_STORAGE_ACCESS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This is normal operation; a page belonging to a protected * guest has not been imported yet. Try to import the page into @@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the @@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - if (vcpu->arch.gmap->pfault_enabled) { - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); - if (rc == -EFAULT) - return vcpu_post_run_addressing_exception(vcpu); - if (rc == -EAGAIN) { - trace_kvm_s390_major_guest_pfault(vcpu); - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - } else { - return rc; - } - } - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags); - if (rc == -EFAULT) { - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = gaddr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } - return vcpu_post_run_addressing_exception(vcpu); - } - break; + kvm_s390_assert_primary_as(vcpu); + return vcpu_dat_fault_handler(vcpu, gaddr, flags); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); send_sig(SIGSEGV, current, 0); break; } - return rc; + return 0; } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: @@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; - if (kvm_is_ucontrol(kvm)) + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL; /* When we are protected, we should not change the memory slots */ @@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 597d7a71deeb..8d3bbb2dd8d2 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -20,6 +20,8 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0) + static inline void kvm_s390_fpu_store(struct kvm_run *run) { fpu_stfpc(&run->s.regs.fpc); @@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) return gd; } +static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa) +{ + hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (!kvm_is_error_hva(hva)) + hva |= offset_in_page(gpa); + return hva; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); @@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits); + +static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) +{ + return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); +} /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 75e81ba26d04..22c012aa5206 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -17,6 +17,7 @@ #include <linux/sched/mm.h> #include <linux/mmu_notifier.h> #include "kvm-s390.h" +#include "gmap.h" bool kvm_s390_pv_is_protected(struct kvm *kvm) { @@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[1] = offset, }; int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + unsigned long vmaddr; + bool unlocked; *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; + if (ret == -ENXIO) { + mmap_read_lock(kvm->mm); + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(vmaddr)) { + ret = -EFAULT; + } else { + ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!ret) + ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); + } + mmap_read_unlock(kvm->mm); + if (!ret) + return -EAGAIN; + return ret; + } + if (ret && ret != -EAGAIN) KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", uvcb.gaddr, *rc, *rrc); @@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", addr, size); + guard(srcu)(&kvm->srcu); + while (offset < size) { ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); if (ret == -EAGAIN) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a687695d8f68..a78df3a4f353 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -13,6 +13,7 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/io.h> +#include <linux/mman.h> #include <asm/gmap.h> #include <asm/mmu_context.h> @@ -22,6 +23,11 @@ #include <asm/facility.h> #include "kvm-s390.h" #include "gaccess.h" +#include "gmap.h" + +enum vsie_page_flags { + VSIE_PAGE_IN_USE = 0, +}; struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ @@ -46,7 +52,18 @@ struct vsie_page { gpa_t gvrd_gpa; /* 0x0240 */ gpa_t riccbd_gpa; /* 0x0248 */ gpa_t sdnx_gpa; /* 0x0250 */ - __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ + /* + * guest address of the original SCB. Remains set for free vsie + * pages, so we can properly look them up in our addr_to_page + * radix tree. + */ + gpa_t scb_gpa; /* 0x0258 */ + /* + * Flags: must be set/cleared atomically after the vsie page can be + * looked up by other CPUs. + */ + unsigned long flags; /* 0x0260 */ + __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, struct kvm *kvm = gmap->private; struct vsie_page *cur; unsigned long prefix; - struct page *page; int i; if (!gmap_is_shadow(gmap)) @@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, * therefore we can safely reference them all the time. */ for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = READ_ONCE(kvm->arch.vsie.pages[i]); - if (!page) + cur = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!cur) continue; - cur = page_to_virt(page); if (READ_ONCE(cur->gmap) != gmap) continue; prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; @@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return rc; } +/* Try getting a given vsie page, returning "true" on success. */ +static inline bool try_get_vsie_page(struct vsie_page *vsie_page) +{ + if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags)) + return false; + return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + +/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */ +static void put_vsie_page(struct vsie_page *vsie_page) +{ + clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + /* * Get or create a vsie page for a scb address. * @@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) { struct vsie_page *vsie_page; - struct page *page; int nr_vcpus; rcu_read_lock(); - page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); - if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); - page_ref_dec(page); + if (vsie_page) { + if (try_get_vsie_page(vsie_page)) { + if (vsie_page->scb_gpa == addr) + return vsie_page; + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + put_vsie_page(vsie_page); + } } /* @@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); - if (!page) { + vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); + if (!vsie_page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } - page_ref_inc(page); - kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page; kvm->arch.vsie.page_count++; } else { /* reuse an existing entry that belongs to nobody */ while (true) { - page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; - if (page_ref_inc_return(page) == 2) + vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + if (try_get_vsie_page(vsie_page)) break; - page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); } - page->index = addr; - /* double use of the same address */ - if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { - page_ref_dec(page); + /* Mark it as invalid until it resides in the tree. */ + vsie_page->scb_gpa = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, + vsie_page)) { + put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + vsie_page->scb_gpa = addr; mutex_unlock(&kvm->arch.vsie.mutex); - vsie_page = page_to_virt(page); memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); release_gmap_shadow(vsie_page); vsie_page->fault_addr = 0; @@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return vsie_page; } -/* put a vsie page acquired via get_vsie_page */ -static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) -{ - struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); - - page_ref_dec(page); -} - int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) { struct vsie_page *vsie_page; @@ -1470,7 +1500,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) out_unpin_scb: unpin_scb(vcpu, vsie_page, scb_addr); out_put: - put_vsie_page(vcpu->kvm, vsie_page); + put_vsie_page(vsie_page); return rc < 0 ? rc : 0; } @@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm) void kvm_s390_vsie_destroy(struct kvm *kvm) { struct vsie_page *vsie_page; - struct page *page; int i; mutex_lock(&kvm->arch.vsie.mutex); for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = kvm->arch.vsie.pages[i]; + vsie_page = kvm->arch.vsie.pages[i]; kvm->arch.vsie.pages[i] = NULL; - vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); - __free_page(page); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); + free_page((unsigned long)vsie_page); } kvm->arch.vsie.page_count = 0; mutex_unlock(&kvm->arch.vsie.mutex); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 16b8a36c56de..94d927785800 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -24,6 +24,16 @@ #include <asm/page.h> #include <asm/tlb.h> +/* + * The address is saved in a radix tree directly; NULL would be ambiguous, + * since 0 is a valid address, and NULL is returned when nothing was found. + * The lower bits are ignored by all users of the macro, so it can be used + * to distinguish a valid address 0 from a NULL. + */ +#define VALID_GADDR_FLAG 1 +#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) +#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) + #define GMAP_SHADOW_FAKE_TABLE 1ULL static struct page *gmap_alloc_crst(void) @@ -43,7 +53,7 @@ static struct page *gmap_alloc_crst(void) * * Returns a guest address space structure. */ -static struct gmap *gmap_alloc(unsigned long limit) +struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -70,9 +80,7 @@ static struct gmap *gmap_alloc(unsigned long limit) gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; - INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); - INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); @@ -82,8 +90,6 @@ static struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; @@ -97,6 +103,7 @@ static struct gmap *gmap_alloc(unsigned long limit) out: return NULL; } +EXPORT_SYMBOL_GPL(gmap_alloc); /** * gmap_create - create a guest address space @@ -185,32 +192,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_free_crst(unsigned long *table, bool free_ptes) +{ + bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; + int i; + + if (is_segment) { + if (!free_ptes) + goto out; + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _SEGMENT_ENTRY_INVALID)) + page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); + } else { + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _REGION_ENTRY_INVALID)) + gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); + } + +out: + free_pages((unsigned long)table, CRST_ALLOC_ORDER); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure * * No locks required. There are no references to this gmap anymore. */ -static void gmap_free(struct gmap *gmap) +void gmap_free(struct gmap *gmap) { - struct page *page, *next; - /* Flush tlb of all gmaps (if not already done for shadows) */ if (!(gmap_is_shadow(gmap) && gmap->removed)) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, CRST_ALLOC_ORDER); + gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); + gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); /* Free additional data for a shadow gmap */ if (gmap_is_shadow(gmap)) { - struct ptdesc *ptdesc, *n; - - /* Free all page tables. */ - list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list) - page_table_free_pgste(ptdesc); gmap_rmap_radix_tree_free(&gmap->host_to_rmap); /* Release reference to the parent */ gmap_put(gmap->parent); @@ -218,6 +239,7 @@ static void gmap_free(struct gmap *gmap) kfree(gmap); } +EXPORT_SYMBOL_GPL(gmap_free); /** * gmap_get - increase reference counter for guest address space @@ -298,10 +320,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; page = NULL; } spin_unlock(&gmap->guest_table_lock); @@ -310,21 +330,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return 0; } -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) { - struct page *page; - unsigned long offset; + return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_pgtable_page((pmd_t *) entry); - return page->index + offset; +static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) +{ + return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} + +static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, + unsigned long *gaddr) +{ + *gaddr = host_to_guest_delete(gmap, vmaddr); + if (IS_GADDR_VALID(*gaddr)) + return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); + return NULL; } /** @@ -336,16 +358,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry) */ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) { - unsigned long *entry; + unsigned long gaddr; int flush = 0; + pmd_t *pmdp; BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_EMPTY); - *entry = _SEGMENT_ENTRY_EMPTY; + + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { + flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } + spin_unlock(&gmap->guest_table_lock); return flush; } @@ -463,26 +488,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) } EXPORT_SYMBOL_GPL(__gmap_translate); -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_translate(gmap, gaddr); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - /** * gmap_unlink - disconnect a page table from the gmap shadow tables * @mm: pointer to the parent mm_struct @@ -582,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) spin_lock(&gmap->guest_table_lock); if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); + vmaddr >> PMD_SHIFT, + (void *)MAKE_VALID_GADDR(gaddr)); if (!rc) { if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & @@ -605,130 +611,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) radix_tree_preload_end(); return rc; } - -/** - * fixup_user_fault_nowait - manually resolve a user page fault without waiting - * @mm: mm_struct of target mm - * @address: user address - * @fault_flags:flags to pass down to handle_mm_fault() - * @unlocked: did we unlock the mmap_lock while retrying - * - * This function behaves similarly to fixup_user_fault(), but it guarantees - * that the fault will be resolved without waiting. The function might drop - * and re-acquire the mm lock, in which case @unlocked will be set to true. - * - * The guarantee is that the fault is handled without waiting, but the - * function itself might sleep, due to the lock. - * - * Context: Needs to be called with mm->mmap_lock held in read mode, and will - * return with the lock held in read mode; @unlocked will indicate whether - * the lock has been dropped and re-acquired. This is the same behaviour as - * fixup_user_fault(). - * - * Return: 0 on success, -EAGAIN if the fault cannot be resolved without - * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of - * memory. - */ -static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, - unsigned int fault_flags, bool *unlocked) -{ - struct vm_area_struct *vma; - unsigned int test_flags; - vm_fault_t fault; - int rc; - - fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; - test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; - - vma = find_vma(mm, address); - if (unlikely(!vma || address < vma->vm_start)) - return -EFAULT; - if (unlikely(!(vma->vm_flags & test_flags))) - return -EFAULT; - - fault = handle_mm_fault(vma, address, fault_flags, NULL); - /* the mm lock has been dropped, take it again */ - if (fault & VM_FAULT_COMPLETED) { - *unlocked = true; - mmap_read_lock(mm); - return 0; - } - /* the mm lock has not been dropped */ - if (fault & VM_FAULT_ERROR) { - rc = vm_fault_to_errno(fault, 0); - BUG_ON(!rc); - return rc; - } - /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ - if (fault & VM_FAULT_RETRY) - return -EAGAIN; - /* nothing needed to be done and the mm lock has not been dropped */ - return 0; -} - -/** - * __gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Context: Needs to be called with mm->mmap_lock held in read mode. Might - * drop and re-acquire the lock. Will always return with the lock held. - */ -static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - unsigned long vmaddr; - bool unlocked; - int rc = 0; - -retry: - unlocked = false; - - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - - if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) - rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); - else - rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); - if (rc) - return rc; - /* - * In the case that fixup_user_fault unlocked the mmap_lock during - * fault-in, redo __gmap_translate() to avoid racing with a - * map/unmap_segment. - * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), - * and __gmap_link() must all be called atomically in one go; if the - * lock had been dropped in between, a retry is needed. - */ - if (unlocked) - goto retry; - - return __gmap_link(gmap, gaddr, vmaddr); -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the - * vm address is already mapped to a different guest segment, and -EAGAIN if - * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed - * immediately. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - int rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_fault(gmap, gaddr, fault_flags); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); +EXPORT_SYMBOL(__gmap_link); /* * this function is assumed to be called with mmap_lock held @@ -853,8 +736,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, * * Note: Can also be called for shadow gmaps. */ -static inline unsigned long *gmap_table_walk(struct gmap *gmap, - unsigned long gaddr, int level) +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table = gmap->table; @@ -905,6 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, } return table; } +EXPORT_SYMBOL(gmap_table_walk); /** * gmap_pte_op_walk - walk the gmap page table, get the page table lock @@ -1101,86 +984,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * - * Returns 0 if successfully protected, -ENOMEM if out of memory and - * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * Returns: + * PAGE_SIZE if a small page was successfully protected; + * HPAGE_SIZE if a large page was successfully protected; + * -ENOMEM if out of memory; + * -EFAULT if gaddr is invalid (or mapping for shadows is missing); + * -EAGAIN if the guest mapping is missing and should be fixed by the caller. * - * Called with sg->mm->mmap_lock in read. + * Context: Called with sg->mm->mmap_lock in read. */ -static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot, unsigned long bits) +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) { - unsigned long vmaddr, dist; pmd_t *pmdp; - int rc; + int rc = 0; BUG_ON(gmap_is_shadow(gmap)); - while (len) { - rc = -EAGAIN; - pmdp = gmap_pmd_op_walk(gmap, gaddr); - if (pmdp) { - if (!pmd_leaf(*pmdp)) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; - } - } else { - rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); - len = len < dist ? 0 : len - dist; - gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; - } - } - gmap_pmd_op_end(gmap, pmdp); - } - if (rc) { - if (rc == -EINVAL) - return rc; - /* -EAGAIN, fixup of userspace mm and gmap */ - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); - if (rc) - return rc; - } - } - return 0; -} + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return -EAGAIN; -/** - * gmap_mprotect_notify - change access rights for a range of ptes and - * call the notifier if any pte changes again - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * - * Returns 0 if for each page in the given range a gmap mapping exists, - * the new access rights could be set and the notifier could be armed. - * If the gmap mapping is missing for one or more pages -EFAULT is - * returned. If no memory could be allocated -ENOMEM is returned. - * This function establishes missing page table entries. - */ -int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot) -{ - int rc; + if (!pmd_leaf(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = PAGE_SIZE; + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = HPAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) - return -EINVAL; - if (!MACHINE_HAS_ESOP && prot == PROT_READ) - return -EINVAL; - mmap_read_lock(gmap->mm); - rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - mmap_read_unlock(gmap->mm); return rc; } -EXPORT_SYMBOL_GPL(gmap_mprotect_notify); +EXPORT_SYMBOL_GPL(gmap_protect_one); /** * gmap_read_table - get an unsigned long value from a guest page table using @@ -1414,7 +1251,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } @@ -1442,7 +1278,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } } @@ -1472,7 +1307,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1500,7 +1334,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1530,7 +1363,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1558,7 +1390,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1588,7 +1419,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1620,7 +1450,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1631,7 +1460,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, * * Called with sg->guest_table_lock */ -static void gmap_unshadow(struct gmap *sg) +void gmap_unshadow(struct gmap *sg) { unsigned long *table; @@ -1657,143 +1486,7 @@ static void gmap_unshadow(struct gmap *sg) break; } } - -/** - * gmap_find_shadow - find a specific asce in the list of shadow tables - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * Returns the pointer to a gmap if a shadow table with the given asce is - * already available, ERR_PTR(-EAGAIN) if another one is just being created, - * otherwise NULL - */ -static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg; - - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce != asce || sg->edat_level != edat_level || - sg->removed) - continue; - if (!sg->initialized) - return ERR_PTR(-EAGAIN); - refcount_inc(&sg->ref_count); - return sg; - } - return NULL; -} - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} -EXPORT_SYMBOL_GPL(gmap_shadow_valid); - -/** - * gmap_shadow - create/find a shadow guest address space - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * The pages of the top level page table referred by the asce parameter - * will be set to read-only and marked in the PGSTEs of the kvm process. - * The shadow table will be removed automatically on any change to the - * PTE mapping for the source table. - * - * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, - * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the - * parent gmap table could not be protected. - */ -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg, *new; - unsigned long limit; - int rc; - - BUG_ON(parent->mm->context.allow_gmap_hpage_1m); - BUG_ON(gmap_is_shadow(parent)); - spin_lock(&parent->shadow_lock); - sg = gmap_find_shadow(parent, asce, edat_level); - spin_unlock(&parent->shadow_lock); - if (sg) - return sg; - /* Create a new shadow gmap */ - limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); - if (asce & _ASCE_REAL_SPACE) - limit = -1UL; - new = gmap_alloc(limit); - if (!new) - return ERR_PTR(-ENOMEM); - new->mm = parent->mm; - new->parent = gmap_get(parent); - new->private = parent->private; - new->orig_asce = asce; - new->edat_level = edat_level; - new->initialized = false; - spin_lock(&parent->shadow_lock); - /* Recheck if another CPU created the same shadow */ - sg = gmap_find_shadow(parent, asce, edat_level); - if (sg) { - spin_unlock(&parent->shadow_lock); - gmap_free(new); - return sg; - } - if (asce & _ASCE_REAL_SPACE) { - /* only allow one real-space gmap shadow */ - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce & _ASCE_REAL_SPACE) { - spin_lock(&sg->guest_table_lock); - gmap_unshadow(sg); - spin_unlock(&sg->guest_table_lock); - list_del(&sg->list); - gmap_put(sg); - break; - } - } - } - refcount_set(&new->ref_count, 2); - list_add(&new->list, &parent->children); - if (asce & _ASCE_REAL_SPACE) { - /* nothing to protect, return right away */ - new->initialized = true; - spin_unlock(&parent->shadow_lock); - return new; - } - spin_unlock(&parent->shadow_lock); - /* protect after insertion, so it will get properly invalidated */ - mmap_read_lock(parent->mm); - rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, GMAP_NOTIFY_SHADOW); - mmap_read_unlock(parent->mm); - spin_lock(&parent->shadow_lock); - new->initialized = true; - if (rc) { - list_del(&new->list); - gmap_free(new); - new = ERR_PTR(rc); - } - spin_unlock(&parent->shadow_lock); - return new; -} -EXPORT_SYMBOL_GPL(gmap_shadow); +EXPORT_SYMBOL(gmap_unshadow); /** * gmap_shadow_r2t - create an empty shadow region 2 table @@ -1827,9 +1520,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r2t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1851,7 +1541,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1911,9 +1600,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r3t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1935,7 +1621,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1995,9 +1680,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = sgt & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -2019,7 +1701,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -2052,45 +1733,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); -/** - * gmap_shadow_pgt_lookup - find a shadow page table - * @sg: pointer to the shadow guest address space structure - * @saddr: the address in the shadow aguest address space - * @pgt: parent gmap address of the page table to get shadowed - * @dat_protection: if the pgtable is marked as protected by dat - * @fake: pgt references contiguous guest memory block, not a pgtable - * - * Returns 0 if the shadow page table was found and -EAGAIN if the page - * table was not found. - * - * Called with sg->mm->mmap_lock in read. - */ -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, - int *fake) +static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) { - unsigned long *table; - struct page *page; - int rc; + unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); - BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->guest_table_lock); - table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ - if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { - /* Shadow page tables are full pages (pte+pgste) */ - page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; - *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); - rc = 0; - } else { - rc = -EAGAIN; - } - spin_unlock(&sg->guest_table_lock); - return rc; + pgstes += _PAGE_ENTRIES; + + pgstes[0] &= ~PGSTE_ST2_MASK; + pgstes[1] &= ~PGSTE_ST2_MASK; + pgstes[2] &= ~PGSTE_ST2_MASK; + pgstes[3] &= ~PGSTE_ST2_MASK; + pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; + pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; + pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; + pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; } -EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); /** * gmap_shadow_pgt - instantiate a shadow page table @@ -2119,9 +1777,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, ptdesc = page_table_alloc_pgste(sg->mm); if (!ptdesc) return -ENOMEM; - ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN; + origin = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) - ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE; + origin |= GMAP_SHADOW_FAKE_TABLE; + gmap_pgste_set_pgt_addr(ptdesc, origin); s_pgt = page_to_phys(ptdesc_page(ptdesc)); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); @@ -2140,7 +1799,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* mark as invalid as long as the parent table is not protected */ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; - list_add(&ptdesc->pt_list, &sg->pt_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_SEGMENT_ENTRY_INVALID; @@ -2318,7 +1976,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { unsigned long offset, gaddr = 0; - unsigned long *table; struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); @@ -2326,12 +1983,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (table) - gaddr = __gmap_segment_gaddr(table) + offset; + gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; spin_unlock(&gmap->guest_table_lock); - if (!table) + if (!IS_GADDR_VALID(gaddr)) continue; if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { @@ -2391,10 +2045,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); if (pmdp) { - gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | @@ -2438,28 +2090,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp); */ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); else if (MACHINE_HAS_IDTE) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2474,22 +2123,19 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); */ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); @@ -2497,7 +2143,7 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2942,49 +2588,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, } EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); -/** - * s390_unlist_old_asce - Remove the topmost level of page tables from the - * list of page tables of the gmap. - * @gmap: the gmap whose table is to be removed - * - * On s390x, KVM keeps a list of all pages containing the page tables of the - * gmap (the CRST list). This list is used at tear down time to free all - * pages that are now not needed anymore. - * - * This function removes the topmost page of the tree (the one pointed to by - * the ASCE) from the CRST list. - * - * This means that it will not be freed when the VM is torn down, and needs - * to be handled separately by the caller, unless a leak is actually - * intended. Notice that this function will only remove the page from the - * list, the page will still be used as a top level page table (and ASCE). - */ -void s390_unlist_old_asce(struct gmap *gmap) -{ - struct page *old; - - old = virt_to_page(gmap->table); - spin_lock(&gmap->guest_table_lock); - list_del(&old->lru); - /* - * Sometimes the topmost page might need to be "removed" multiple - * times, for example if the VM is rebooted into secure mode several - * times concurrently, or if s390_replace_asce fails after calling - * s390_remove_old_asce and is attempted again later. In that case - * the old asce has been removed from the list, and therefore it - * will not be freed when the VM terminates, but the ASCE is still - * in use and still pointed to. - * A subsequent call to replace_asce will follow the pointer and try - * to remove the same page from the list again. - * Therefore it's necessary that the page of the ASCE has valid - * pointers, so list_del can work (and do nothing) without - * dereferencing stale or invalid pointers. - */ - INIT_LIST_HEAD(&old->lru); - spin_unlock(&gmap->guest_table_lock); -} -EXPORT_SYMBOL_GPL(s390_unlist_old_asce); - /** * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced @@ -3004,8 +2607,6 @@ int s390_replace_asce(struct gmap *gmap) struct page *page; void *table; - s390_unlist_old_asce(gmap); - /* Replacing segment type ASCEs would cause serious issues */ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; @@ -3013,19 +2614,9 @@ int s390_replace_asce(struct gmap *gmap) page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); - /* - * The caller has to deal with the old ASCE, but here we make sure - * the new one is properly added to the CRST list, so that - * it will be freed when the VM is torn down. - */ - spin_lock(&gmap->guest_table_lock); - list_add(&page->lru, &gmap->crst_list); - spin_unlock(&gmap->guest_table_lock); - /* Set new table origin while preserving existing ASCE control bits */ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); WRITE_ONCE(gmap->asce, asce); @@ -3035,3 +2626,31 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); + +/** + * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + */ +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} +EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index cd2fef79ad2c..30387a6e98ff 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm) } table = ptdesc_to_virt(ptdesc); __arch_set_page_dat(table, 1); - /* pt_list is used by gmap only */ - INIT_LIST_HEAD(&ptdesc->pt_list); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f2051644de94..606c74f27459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -25,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2cbb3874ad39..8eb3a88707f2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1180,7 +1180,7 @@ void kvm_set_cpu_caps(void) SYNTHESIZED_F(SBPB), SYNTHESIZED_F(IBPB_BRTYPE), SYNTHESIZED_F(SRSO_NO), - SYNTHESIZED_F(SRSO_USER_KERNEL_NO), + F(SRSO_USER_KERNEL_NO), ); kvm_cpu_cap_init(CPUID_8000_0022_EAX, diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a45ae60e84ab..74c20dbb92da 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7120,6 +7120,19 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static void kvm_wake_nx_recovery_thread(struct kvm *kvm) +{ + /* + * The NX recovery thread is spawned on-demand at the first KVM_RUN and + * may not be valid even though the VM is globally visible. Do nothing, + * as such a VM can't have any possible NX huge pages. + */ + struct vhost_task *nx_thread = READ_ONCE(kvm->arch.nx_huge_page_recovery_thread); + + if (nx_thread) + vhost_task_wake(nx_thread); +} + static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) @@ -7180,7 +7193,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); } mutex_unlock(&kvm_lock); } @@ -7315,7 +7328,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); mutex_unlock(&kvm_lock); } @@ -7451,14 +7464,20 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); + struct vhost_task *nx_thread; kvm->arch.nx_huge_page_last = get_jiffies_64(); - kvm->arch.nx_huge_page_recovery_thread = vhost_task_create( - kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, - kvm, "kvm-nx-lpage-recovery"); + nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker, + kvm_nx_huge_page_recovery_worker_kill, + kvm, "kvm-nx-lpage-recovery"); - if (kvm->arch.nx_huge_page_recovery_thread) - vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); + if (!nx_thread) + return; + + vhost_task_start(nx_thread); + + /* Make the task visible only once it is fully started. */ + WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); } int kvm_mmu_post_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d4a6734b2d6..8e77e61d4fbd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12741,6 +12741,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); } + once_init(&kvm->arch.nx_once); return 0; out_uninit_mmu: @@ -12750,12 +12751,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return ret; } -int kvm_arch_post_init_vm(struct kvm *kvm) -{ - once_init(&kvm->arch.nx_once); - return 0; -} - static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 9252652afe59..894edf8d6d62 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -100,9 +100,6 @@ SYM_FUNC_START(xen_hypercall_hvm) push %r10 push %r9 push %r8 -#ifdef CONFIG_FRAME_POINTER - pushq $0 /* Dummy push for stack alignment. */ -#endif #endif /* Set the vendor specific function. */ call __xen_hypercall_setfunc @@ -117,11 +114,8 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %ebx pop %eax #else - lea xen_hypercall_amd(%rip), %rbx - cmp %rax, %rbx -#ifdef CONFIG_FRAME_POINTER - pop %rax /* Dummy pop. */ -#endif + lea xen_hypercall_amd(%rip), %rcx + cmp %rax, %rcx pop %r8 pop %r9 pop %r10 @@ -132,6 +126,7 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %rcx pop %rax #endif + FRAME_END /* Use correct hypercall function. */ jz xen_hypercall_amd jmp xen_hypercall_intel diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 97d4a032171f..f5b8497cf5ad 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -21,6 +21,11 @@ #define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ +MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); + /* * Bind the driver base on (vendor_id, device_id) pair and later use the * (device_id, rev_id) pair as a key to select the devices. The devices with diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 1e8ffbe25eee..38cf1c342c72 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -397,15 +397,19 @@ int ivpu_boot(struct ivpu_device *vdev) if (ivpu_fw_is_cold_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; ret = ivpu_hw_sched_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; } return 0; +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); err_diagnose_failure: ivpu_hw_diagnose_failure(vdev); ivpu_mmu_evtq_dump(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 87d7411ae059..5060c5dd40d1 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -115,41 +115,57 @@ static int ivpu_resume(struct ivpu_device *vdev) return ret; } -static void ivpu_pm_recovery_work(struct work_struct *work) +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) { - struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; - char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - - ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); - - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (ret) - ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - - ivpu_jsm_state_dump(vdev); - ivpu_dev_coredump(vdev); + pm_runtime_disable(vdev->drm.dev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; - ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } up_write(&vdev->pm->reset_lock); atomic_set(&vdev->pm->reset_pending, 0); - kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) @@ -309,7 +325,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev) int ret; ret = pm_runtime_resume_and_get(vdev->drm.dev); - drm_WARN_ON(&vdev->drm, ret < 0); + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } @@ -325,16 +344,13 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) struct ivpu_device *vdev = pci_get_drvdata(pdev); ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->reset_pending, 1); - pm_runtime_get_sync(vdev->drm.dev); - down_write(&vdev->pm->reset_lock); - ivpu_prepare_for_reset(vdev); - ivpu_hw_reset(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); - ivpu_ms_cleanup_all(vdev); + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } @@ -342,18 +358,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - up_write(&vdev->pm->reset_lock); - atomic_set(&vdev->pm->reset_pending, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } void ivpu_pm_init(struct ivpu_device *vdev) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 747f83f7114d..e549914a636c 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -287,9 +287,7 @@ static acpi_status acpi_platformrt_space_handler(u32 function, if (!handler || !module) goto invalid_guid; - if (!handler->handler_addr || - !handler->static_data_buffer_addr || - !handler->acpi_param_buffer_addr) { + if (!handler->handler_addr) { buffer->prm_status = PRM_HANDLER_ERROR; return AE_OK; } diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 98d93ed58315..436019d96027 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, } break; } - if (nval == 0) - return -EINVAL; if (obj->type == ACPI_TYPE_BUFFER) { if (proptype != DEV_PROP_U8) @@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, ret = acpi_copy_property_array_uint(items, (u64 *)val, nval); break; case DEV_PROP_STRING: - ret = acpi_copy_property_array_string( - items, (char **)val, - min_t(u32, nval, obj->package.count)); + nval = min_t(u32, nval, obj->package.count); + if (nval == 0) + return -ENODATA; + + ret = acpi_copy_property_array_string(items, (char **)val, nval); break; default: ret = -EINVAL; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 90aaec923889..b4cd14e7fa76 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -563,6 +563,12 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { DMI_MATCH(DMI_BOARD_NAME, "RP-15"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Eluktronics Inc."), + DMI_MATCH(DMI_BOARD_NAME, "MECH-17"), + }, + }, { /* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */ .matches = { diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d497d448e4b2..40e1d8d8a589 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev, bool set_active) +static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; - if (dev->parent) { + if (dev->parent) dev->parent->power.must_resume = true; - if (set_active) - dev->parent->power.set_active = true; - } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; - if (set_active) - link->supplier->power.set_active = true; - } device_links_read_unlock(idx); } @@ -1287,9 +1281,12 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy dev->power.must_resume = true; if (dev->power.must_resume) { - dev->power.set_active = dev->power.set_active || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - dpm_superior_set_must_resume(dev, dev->power.set_active); + if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { + dev->power.set_active = true; + if (dev->parent && !dev->parent->power.ignore_children) + dev->parent->power.set_active = true; + } + dpm_superior_set_must_resume(dev); } Complete: diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 33b3bc99d532..282f81616a78 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1127,8 +1127,8 @@ static void vdc_queue_drain(struct vdc_port *port) spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q, memflags); - blk_mq_unfreeze_queue(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static void vdc_ldc_reset_timer_work(struct work_struct *work) diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 6276551d7968..1e57ebfb7622 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -657,7 +657,7 @@ static void moxtet_irq_print_chip(struct irq_data *d, struct seq_file *p) id = moxtet->modules[pos->idx]; - seq_printf(p, " moxtet-%s.%i#%i", mox_module_name(id), pos->idx, + seq_printf(p, "moxtet-%s.%i#%i", mox_module_name(id), pos->idx, pos->bit); } diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 0ee5c691fb36..9e46960f6a86 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,8 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on (ARCH_AIROHA && OF) || COMPILE_TEST + depends on ARCH_AIROHA || COMPILE_TEST + depends on OF select PM_OPP default ARCH_AIROHA help diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index dd9b8d6993d6..313550fa62d4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (min_perf < lowest_nonlinear_perf) min_perf = lowest_nonlinear_perf; - max_perf = cap_perf; + max_perf = cpudata->max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; @@ -747,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); return ret; @@ -822,25 +821,28 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) static void amd_pstate_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; int ret; bool highest_perf_changed = false; + if (!amd_pstate_prefcore) + return; + + policy = cpufreq_cpu_get(cpu); if (!policy) return; cpudata = policy->driver_data; - if (!amd_pstate_prefcore) - return; - guard(mutex)(&amd_pstate_driver_lock); ret = amd_get_highest_perf(cpu, &cur_high); - if (ret) - goto free_cpufreq_put; + if (ret) { + cpufreq_cpu_put(policy); + return; + } prev_high = READ_ONCE(cpudata->prefcore_ranking); highest_perf_changed = (prev_high != cur_high); @@ -850,8 +852,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (cur_high < CPPC_MAX_PERF) sched_set_itmt_core_prio((int)cur_high, cpu); } - -free_cpufreq_put: cpufreq_cpu_put(policy); if (!highest_perf_changed) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e0048856ecee..30ffbddc7ece 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1571,7 +1571,8 @@ static int cpufreq_online(unsigned int cpu) policy->cdev = of_cpufreq_cooling_register(policy); /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (policy->boost_enabled != cpufreq_boost_enabled()) { + if (cpufreq_driver->set_boost && + policy->boost_enabled != cpufreq_boost_enabled()) { policy->boost_enabled = cpufreq_boost_enabled(); ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); if (ret) { diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 71d8b26c4103..9f35f69e0f9e 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -106,7 +106,7 @@ config ISCSI_IBFT select ISCSI_BOOT_SYSFS select ISCSI_IBFT_FIND if X86 depends on ACPI && SCSI && SCSI_LOWLEVEL - default n + default n help This option enables support for detection and exposing of iSCSI Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 6e9788324fea..371f24569b3b 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -310,7 +310,10 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf) str += sprintf_ipaddr(str, nic->ip_addr); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); + if (nic->subnet_mask_prefix > 32) + val = cpu_to_be32(~0); + else + val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); str += sprintf(str, "%pI4", &val); break; case ISCSI_BOOT_ETH_PREFIX_LEN: diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index add5ad29a673..98b4d1633b25 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -338,6 +338,7 @@ config GPIO_GRANITERAPIDS config GPIO_GRGPIO tristate "Aeroflex Gaisler GRGPIO support" + depends on OF || COMPILE_TEST select GPIO_GENERIC select IRQ_DOMAIN help diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index be4c9981ebc4..d63c1030e6ac 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin DECLARE_BITMAP(trigger, MAX_LINE); int ret; - if (chip->driver_data & PCA_PCAL) { - /* Read the current interrupt status from the device */ - ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger); - if (ret) - return false; - - /* Check latched inputs and clear interrupt status */ - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); - if (ret) - return false; - - /* Apply filter for rising/falling edge selection */ - bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio); - - bitmap_and(pending, new_stat, trigger, gc->ngpio); - - return !bitmap_empty(pending, gc->ngpio); - } - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); if (ret) return false; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index a086087ada17..b6c230fab840 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -1028,20 +1028,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock) struct configfs_subsystem *subsys = dev->group.cg_subsys; struct gpio_sim_bank *bank; struct gpio_sim_line *line; + struct config_item *item; /* - * The device only needs to depend on leaf line entries. This is + * The device only needs to depend on leaf entries. This is * sufficient to lock up all the configfs entries that the * instantiated, alive device depends on. */ list_for_each_entry(bank, &dev->bank_list, siblings) { list_for_each_entry(line, &bank->line_list, siblings) { + item = line->hog ? &line->hog->item + : &line->group.cg_item; + if (lock) - WARN_ON(configfs_depend_item_unlocked( - subsys, &line->group.cg_item)); + WARN_ON(configfs_depend_item_unlocked(subsys, + item)); else - configfs_undepend_item_unlocked( - &line->group.cg_item); + configfs_undepend_item_unlocked(item); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 817116e53d44..dce9323fb410 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -119,9 +119,10 @@ * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 59 +#define KMS_DRIVER_MINOR 60 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff286940ab43..01ae2f88dec8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | - AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 461fb8090ae0..208b7d1d8a27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -119,6 +119,8 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1 #define AMDGPU_COPY_FLAGS_SET(field, value) \ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9c17df2cf37b..7e10e94624e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1741,11 +1741,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_cm; max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | @@ -1762,7 +1763,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | - ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); else ib->ptr[ib->length_dw++] = 0; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cecaadf741ad..f84e795e35f5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2133,7 +2133,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - if (context->stream_count > get_seamless_boot_stream_count(context) || + if (get_seamless_boot_stream_count(context) == 0 || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ hwss_wait_for_no_pipes_pending(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 5bb8b78bf250..bf636b28e3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,8 +63,7 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || - link->psr_settings.psr_version == DC_PSR_VERSION_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) return true; if (link->replay_settings.replay_feature_enabled) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 46f9c05de16e..e1d500633dfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -29,11 +29,15 @@ dml_ccflags := $(CC_FLAGS_FPU) dml_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -frame_warn_flag := -Wframe-larger-than=3072 -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + frame_warn_limit := 3072 + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 91c4f3b4bd5f..21fd466dba26 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -28,15 +28,19 @@ dml2_ccflags := $(CC_FLAGS_FPU) dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) -frame_warn_flag := -Wframe-larger-than=4096 -else -frame_warn_flag := -Wframe-larger-than=3072 -endif -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b9c6b45f6872..0c8ec30ea672 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1017,7 +1017,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_streams++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); @@ -1042,7 +1042,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index b416320873e1..b8a34abaf519 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -786,7 +786,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -1343,7 +1343,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); @@ -1383,7 +1383,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state( diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index fe741100c0f8..d347bb06577a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -129,7 +129,8 @@ bool hubbub3_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index 7fb5523f9722..b98505b240a7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -750,7 +750,8 @@ static bool hubbub31_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 5264dc26cce1..32a6be543105 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -786,7 +786,8 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 5eb3da8d5206..dce7269959ce 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -326,7 +326,8 @@ static bool hubbub35_program_watermarks( DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index be0ac613675a..0da70b50e86d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -500,6 +500,8 @@ void hubp3_init(struct hubp *hubp) //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); + hubp_reset(hubp); } diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index edd37898d550..f3a21c623f44 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8); + + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); } static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 623cde76debf..b907ad1acedd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -236,7 +236,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); - if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index ebccb74306a7..f30b3d5eeca5 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -160,6 +160,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, kwb_conn->wb_layer->layer_type, &n_formats); + if (!formats) { + kfree(kwb_conn); + return -ENOMEM; + } err = drm_writeback_connector_init(&kms->base, wb_conn, &komeda_wb_connector_funcs, diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 0e282b7b167c..b9eb67e3fa90 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -195,7 +195,7 @@ static bool __ast_dp_wait_enable(struct ast_device *ast, bool enabled) if (enabled) vgacrdf_test |= AST_IO_VGACRDF_DP_VIDEO_ENABLE; - for (i = 0; i < 200; ++i) { + for (i = 0; i < 1000; ++i) { if (i) mdelay(1); vgacrdf = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xdf, diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 007ceb281d00..56a4965e518c 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) if (!aux->transfer) return; -#ifndef CONFIG_MEDIA_CEC_RC - /* - * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by - * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined. - * - * Do this here as well to ensure the tests against cec_caps are - * correct. - */ - cec_caps &= ~CEC_CAP_RC; -#endif cancel_delayed_work_sync(&aux->cec.unregister_work); mutex_lock(&aux->cec.lock); @@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) num_las = CEC_MAX_LOG_ADDRS; if (aux->cec.adap) { - if (aux->cec.adap->capabilities == cec_caps && + /* Check if the adapter properties have changed */ + if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) == + (cec_caps & CEC_CAP_MONITOR_ALL) && aux->cec.adap->available_log_addrs == num_las) { /* Unchanged, so just set the phys addr */ cec_s_phys_addr(aux->cec.adap, source_physical_address, false); diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index fc1e517e074a..7e6ce905bdaf 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -41,8 +41,9 @@ static u32 scale(u32 source_val, { u64 target_val; - WARN_ON(source_min > source_max); - WARN_ON(target_min > target_max); + if (WARN_ON(source_min >= source_max) || + WARN_ON(target_min > target_max)) + return target_min; /* defensive */ source_val = clamp(source_val, source_min, source_max); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f1f3b1bb1e89..aa77ddcee42c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1791,7 +1791,7 @@ int intel_dp_dsc_max_src_input_bpc(struct intel_display *display) if (DISPLAY_VER(display) == 11) return 10; - return 0; + return intel_dp_dsc_min_src_input_bpc(); } int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, @@ -2072,11 +2072,10 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp) + for (i = ARRAY_SIZE(valid_dsc_bpp) - 1; i >= 0; i--) { + if (valid_dsc_bpp[i] < dsc_min_bpp || + valid_dsc_bpp[i] > dsc_max_bpp) continue; - if (valid_dsc_bpp[i] > dsc_max_bpp) - break; ret = dsc_compute_link_config(intel_dp, pipe_config, @@ -2829,7 +2828,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); - /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */ as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC; as_sdp->length = 0x9; as_sdp->duration_incr_ms = 0; @@ -2840,7 +2838,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode); as_sdp->target_rr_divider = true; } else { - as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL; + as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL; as_sdp->vtotal = adjusted_mode->vtotal; as_sdp->target_rr = 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0c44fc7dd86c..a65cf97ad12d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -341,6 +341,10 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, break; } + + /* Allow using zero step to indicate one try */ + if (!step) + break; } if (slots < 0) { diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 7464b44c8bb3..1bab7c34a794 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -41,7 +41,7 @@ intel_hdcp_adjust_hdcp_line_rekeying(struct intel_encoder *encoder, u32 rekey_bit = 0; /* Here we assume HDMI is in TMDS mode of operation */ - if (encoder->type != INTEL_OUTPUT_HDMI) + if (!intel_encoder_is_hdmi(encoder)) return; if (DISPLAY_VER(display) >= 30) { @@ -2188,6 +2188,19 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) drm_dbg_kms(display->drm, "HDCP2.2 Downstream topology change\n"); + + ret = hdcp2_authenticate_repeater_topology(connector); + if (!ret) { + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); + goto out; + } + + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Repeater topology auth failed.(%d)\n", + connector->base.base.id, connector->base.name, + ret); } else { drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] HDCP2.2 link failed, retrying auth\n", diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ff9764cac1e7..80e558042d97 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -106,8 +106,6 @@ static const u32 icl_sdr_y_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_sdr_uv_plane_formats[] = { @@ -134,8 +132,6 @@ static const u32 icl_sdr_uv_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_hdr_plane_formats[] = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index fe69f2c8527d..ae3343c81a64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -209,8 +209,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct address_space *mapping = obj->base.filp->f_mapping; unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; - struct sgt_iter sgt_iter; - struct page *page; int ret; /* @@ -239,9 +237,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) * for PAGE_SIZE chunks instead may be helpful. */ if (max_segment > PAGE_SIZE) { - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); + shmem_sg_free_table(st, mapping, false, false); kfree(st); max_segment = PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 12f1ba7ca9c1..cc05bd9e43b4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1469,6 +1469,19 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1619,6 +1632,9 @@ void intel_guc_busyness_park(struct intel_gt *gt) if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker @@ -5519,12 +5535,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a49561e9f3c3..a79ad2da070c 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -51,6 +51,10 @@ /* Common to all OA units */ #define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) +#define OAG_OACONTROL_USED_BITS \ + (OAG_OACONTROL_OA_PES_DISAG_EN | OAG_OACONTROL_OA_CCS_SELECT_MASK | \ + OAG_OACONTROL_OA_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE | \ + OA_OACONTROL_REPORT_BC_MASK | OA_OACONTROL_COUNTER_SIZE_MASK) #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) @@ -78,6 +82,8 @@ #define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) #define OAM_CONTROL_OFFSET (0x194) #define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_OACONTROL_USED_BITS \ + (OAM_CONTROL_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE) #define OAM_DEBUG_OFFSET (0x198) #define OAM_STATUS_OFFSET (0x19c) #define OAM_MMIO_TRG_OFFSET (0x1d0) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81dc7795c065..39fe485d2085 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -119,11 +119,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - /* - * Don't add a new section header here because the mesa debug decoder - * tool expects the context information to be in the 'GuC CT' section. - */ - /* drm_puts(&p, "\n**** Contexts ****\n"); */ + drm_puts(&p, "\n**** Contexts ****\n"); xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -395,42 +391,34 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -462,7 +450,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -474,10 +461,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d60102..5391a80a4d1b 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 26e64530ada2..5d6fb79957b6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -532,8 +532,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) + if (IS_SRIOV_PF(gt_to_xe(gt))) { + xe_gt_sriov_pf_init(gt); xe_gt_sriov_pf_init_hw(gt); + } xe_force_wake_put(gt_to_fw(gt), fw_ref); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index e71fc3d2bda2..6f906c8e8108 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -68,6 +68,19 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Late one-time initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return xe_gt_sriov_pf_migration_init(gt); +} + static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) { return GRAPHICS_VERx100(xe) == 1200; @@ -90,7 +103,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) pf_enable_ggtt_guest_update(gt); xe_gt_sriov_pf_service_update(gt); - xe_gt_sriov_pf_migration_init(gt); } static u32 pf_get_vf_regs_stride(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 96fab779a906..f474509411c0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,6 +10,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); @@ -19,6 +20,11 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static inline int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc..50c8076b5158 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1724,7 +1724,8 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, snapshot->g2h_outstanding); if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + xe_print_blob_ascii85(p, "CTB data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index df4cfb698cdb..2baa4d95571f 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -211,8 +211,10 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "Log data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; - xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } } diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eeb96b5f49e2..fa873f3d0a9d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -237,7 +237,6 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; unsigned long flags; - bool pollin; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -282,11 +281,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); - pollin = available >= stream->wait_num_reports * report_size; + stream->pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return pollin; + return stream->pollin; } static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) @@ -294,10 +293,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) struct xe_oa_stream *stream = container_of(hrtimer, typeof(*stream), poll_check_timer); - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; + if (xe_oa_buffer_check_unlocked(stream)) wake_up(&stream->poll_wq); - } hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); @@ -452,6 +449,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) return val; } +static u32 __oactrl_used_bits(struct xe_oa_stream *stream) +{ + return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -472,14 +475,14 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); + xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); } static void xe_oa_disable(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); + xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, @@ -2534,6 +2537,8 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); + /* Ensure MMIO trigger remains disabled till there is a stream */ xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5546184df05f..35a221e2c11c 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1300,12 +1300,14 @@ new_device_store(struct device *dev, struct device_attribute *attr, info.flags |= I2C_CLIENT_SLAVE; } - info.flags |= I2C_CLIENT_USER; - client = i2c_new_client_device(adap, &info); if (IS_ERR(client)) return PTR_ERR(client); + /* Keep track of the added device */ + mutex_lock(&adap->userspace_clients_lock); + list_add_tail(&client->detected, &adap->userspace_clients); + mutex_unlock(&adap->userspace_clients_lock); dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device", info.type, info.addr); @@ -1313,15 +1315,6 @@ new_device_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_WO(new_device); -static int __i2c_find_user_addr(struct device *dev, const void *addrp) -{ - struct i2c_client *client = i2c_verify_client(dev); - unsigned short addr = *(unsigned short *)addrp; - - return client && client->flags & I2C_CLIENT_USER && - i2c_encode_flags_to_addr(client) == addr; -} - /* * And of course let the users delete the devices they instantiated, if * they got it wrong. This interface can only be used to delete devices @@ -1336,7 +1329,7 @@ delete_device_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct i2c_adapter *adap = to_i2c_adapter(dev); - struct device *child_dev; + struct i2c_client *client, *next; unsigned short addr; char end; int res; @@ -1352,19 +1345,28 @@ delete_device_store(struct device *dev, struct device_attribute *attr, return -EINVAL; } - mutex_lock(&core_lock); /* Make sure the device was added through sysfs */ - child_dev = device_find_child(&adap->dev, &addr, __i2c_find_user_addr); - if (child_dev) { - i2c_unregister_device(i2c_verify_client(child_dev)); - put_device(child_dev); - } else { - dev_err(dev, "Can't find userspace-created device at %#x\n", addr); - count = -ENOENT; + res = -ENOENT; + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + if (i2c_encode_flags_to_addr(client) == addr) { + dev_info(dev, "%s: Deleting device %s at 0x%02hx\n", + "delete_device", client->name, client->addr); + + list_del(&client->detected); + i2c_unregister_device(client); + res = count; + break; + } } - mutex_unlock(&core_lock); + mutex_unlock(&adap->userspace_clients_lock); - return count; + if (res < 0) + dev_err(dev, "%s: Can't find device in list\n", + "delete_device"); + return res; } static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL, delete_device_store); @@ -1535,6 +1537,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap) adap->locked_flags = 0; rt_mutex_init(&adap->bus_lock); rt_mutex_init(&adap->mux_lock); + mutex_init(&adap->userspace_clients_lock); + INIT_LIST_HEAD(&adap->userspace_clients); /* Set default timeout to 1 second if not already set */ if (adap->timeout == 0) @@ -1700,6 +1704,23 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); +static void i2c_do_del_adapter(struct i2c_driver *driver, + struct i2c_adapter *adapter) +{ + struct i2c_client *client, *_n; + + /* Remove the devices we created ourselves as the result of hardware + * probing (using a driver's detect method) */ + list_for_each_entry_safe(client, _n, &driver->clients, detected) { + if (client->adapter == adapter) { + dev_dbg(&adapter->dev, "Removing %s at 0x%x\n", + client->name, client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + } +} + static int __unregister_client(struct device *dev, void *dummy) { struct i2c_client *client = i2c_verify_client(dev); @@ -1715,6 +1736,12 @@ static int __unregister_dummy(struct device *dev, void *dummy) return 0; } +static int __process_removed_adapter(struct device_driver *d, void *data) +{ + i2c_do_del_adapter(to_i2c_driver(d), data); + return 0; +} + /** * i2c_del_adapter - unregister I2C adapter * @adap: the adapter being unregistered @@ -1726,6 +1753,7 @@ static int __unregister_dummy(struct device *dev, void *dummy) void i2c_del_adapter(struct i2c_adapter *adap) { struct i2c_adapter *found; + struct i2c_client *client, *next; /* First make sure that this adapter was ever added */ mutex_lock(&core_lock); @@ -1737,16 +1765,31 @@ void i2c_del_adapter(struct i2c_adapter *adap) } i2c_acpi_remove_space_handler(adap); + /* Tell drivers about this removal */ + mutex_lock(&core_lock); + bus_for_each_drv(&i2c_bus_type, NULL, adap, + __process_removed_adapter); + mutex_unlock(&core_lock); + + /* Remove devices instantiated from sysfs */ + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name, + client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + mutex_unlock(&adap->userspace_clients_lock); /* Detach any active clients. This can't fail, thus we do not * check the returned value. This is a two-pass process, because * we can't remove the dummy devices during the first pass: they * could have been instantiated by real devices wishing to clean * them up properly, so we give them a chance to do that first. */ - mutex_lock(&core_lock); device_for_each_child(&adap->dev, NULL, __unregister_client); device_for_each_child(&adap->dev, NULL, __unregister_dummy); - mutex_unlock(&core_lock); /* device name is gone after device_unregister */ dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name); @@ -1966,6 +2009,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) /* add the driver to the list of i2c drivers in the driver core */ driver->driver.owner = owner; driver->driver.bus = &i2c_bus_type; + INIT_LIST_HEAD(&driver->clients); /* When registration returns, the driver core * will have called probe() for all matching-but-unbound devices. @@ -1983,13 +2027,10 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) } EXPORT_SYMBOL(i2c_register_driver); -static int __i2c_unregister_detected_client(struct device *dev, void *argp) +static int __process_removed_driver(struct device *dev, void *data) { - struct i2c_client *client = i2c_verify_client(dev); - - if (client && client->flags & I2C_CLIENT_AUTO) - i2c_unregister_device(client); - + if (dev->type == &i2c_adapter_type) + i2c_do_del_adapter(data, to_i2c_adapter(dev)); return 0; } @@ -2000,12 +2041,7 @@ static int __i2c_unregister_detected_client(struct device *dev, void *argp) */ void i2c_del_driver(struct i2c_driver *driver) { - mutex_lock(&core_lock); - /* Satisfy __must_check, function can't fail */ - if (driver_for_each_device(&driver->driver, NULL, NULL, - __i2c_unregister_detected_client)) { - } - mutex_unlock(&core_lock); + i2c_for_each_dev(driver, __process_removed_driver); driver_unregister(&driver->driver); pr_debug("driver [%s] unregistered\n", driver->driver.name); @@ -2432,7 +2468,6 @@ static int i2c_detect_address(struct i2c_client *temp_client, /* Finally call the custom detection function */ memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = addr; - info.flags = I2C_CLIENT_AUTO; err = driver->detect(temp_client, &info); if (err) { /* -ENODEV is returned if the detection fails. We catch it @@ -2459,7 +2494,9 @@ static int i2c_detect_address(struct i2c_client *temp_client, dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n", info.type, info.addr); client = i2c_new_client_device(adapter, &info); - if (IS_ERR(client)) + if (!IS_ERR(client)) + list_add_tail(&client->detected, &driver->clients); + else dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n", info.type, info.addr); } diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4..c11b9965c4ad 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -169,6 +169,7 @@ config IXP4XX_IRQ config LAN966X_OIC tristate "Microchip LAN966x OIC Support" + depends on MCHP_LAN966X_PCI || COMPILE_TEST select GENERIC_IRQ_CHIP select IRQ_DOMAIN help diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index da5250f0155c..2b1684c60e3c 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { int irq; if (cpumask_test_cpu(smp_processor_id(), &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index b337f6c05f18..4eebed39880a 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; - struct mvebu_icu_msi_data *msi_data = d->host_data; + struct msi_domain_info *info = d->host_data; + struct mvebu_icu_msi_data *msi_data = info->chip_data; struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c index 8e76d2913e6b..4441ffe149ea 100644 --- a/drivers/irqchip/irq-partition-percpu.c +++ b/drivers/irqchip/irq-partition-percpu.c @@ -98,7 +98,7 @@ static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p) struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - seq_printf(p, " %5s-%lu", chip->name, data->hwirq); + seq_printf(p, "%5s-%lu", chip->name, data->hwirq); } static struct irq_chip partition_irq_chip = { diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f..275df5005705 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu) { struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu); - writel_relaxed(IMSIC_IPI_ID, local->msi_va); + writel(IMSIC_IPI_ID, local->msi_va); } static void imsic_ipi_starting_cpu(void) diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c index b0e366ade427..8ff6e7a1363b 100644 --- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); static void thead_aclint_sswi_ipi_send(unsigned int cpu) { - writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); + writel(0x1, per_cpu(sswi_cpu_regs, cpu)); } static void thead_aclint_sswi_ipi_clear(void) diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index a382929ce7ba..369aed044b40 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -76,10 +76,8 @@ static int linear_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index a9604ba3c805..3a64dc7a7e27 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1843,65 +1843,6 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt) return FAILED; } -/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/** - * mptscsih_target_reset - Perform a SCSI TARGET_RESET! - * @SCpnt: Pointer to scsi_cmnd structure, IO which reset is due to - * - * (linux scsi_host_template.eh_target_reset_handler routine) - * - * Returns SUCCESS or FAILED. - **/ -int -mptscsih_target_reset(struct scsi_cmnd * SCpnt) -{ - MPT_SCSI_HOST *hd; - int retval; - VirtDevice *vdevice; - MPT_ADAPTER *ioc; - - /* If we can't locate our host adapter structure, return FAILED status. - */ - if ((hd = shost_priv(SCpnt->device->host)) == NULL){ - printk(KERN_ERR MYNAM ": target reset: " - "Can't locate host! (sc=%p)\n", SCpnt); - return FAILED; - } - - ioc = hd->ioc; - printk(MYIOC_s_INFO_FMT "attempting target reset! (sc=%p)\n", - ioc->name, SCpnt); - scsi_print_command(SCpnt); - - vdevice = SCpnt->device->hostdata; - if (!vdevice || !vdevice->vtarget) { - retval = 0; - goto out; - } - - /* Target reset to hidden raid component is not supported - */ - if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) { - retval = FAILED; - goto out; - } - - retval = mptscsih_IssueTaskMgmt(hd, - MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, - vdevice->vtarget->channel, - vdevice->vtarget->id, 0, 0, - mptscsih_get_tm_timeout(ioc)); - - out: - printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n", - ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); - - if (retval == 0) - return SUCCESS; - else - return FAILED; -} - /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** @@ -2915,14 +2856,14 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io) timeout = 10; break; - case RESERVE: + case RESERVE_6: cmdLen = 6; dir = MPI_SCSIIO_CONTROL_READ; CDB[0] = cmd; timeout = 10; break; - case RELEASE: + case RELEASE_6: cmdLen = 6; dir = MPI_SCSIIO_CONTROL_READ; CDB[0] = cmd; @@ -3306,7 +3247,6 @@ EXPORT_SYMBOL(mptscsih_sdev_destroy); EXPORT_SYMBOL(mptscsih_sdev_configure); EXPORT_SYMBOL(mptscsih_abort); EXPORT_SYMBOL(mptscsih_dev_reset); -EXPORT_SYMBOL(mptscsih_target_reset); EXPORT_SYMBOL(mptscsih_bus_reset); EXPORT_SYMBOL(mptscsih_host_reset); EXPORT_SYMBOL(mptscsih_bios_param); diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index ece451c575e1..8c2bb2331fc1 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -121,7 +121,6 @@ extern int mptscsih_sdev_configure(struct scsi_device *device, struct queue_limits *lim); extern int mptscsih_abort(struct scsi_cmnd * SCpnt); extern int mptscsih_dev_reset(struct scsi_cmnd * SCpnt); -extern int mptscsih_target_reset(struct scsi_cmnd * SCpnt); extern int mptscsih_bus_reset(struct scsi_cmnd * SCpnt); extern int mptscsih_host_reset(struct scsi_cmnd *SCpnt); extern int mptscsih_bios_param(struct scsi_device * sdev, struct block_device *bdev, sector_t capacity, int geom[]); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index fe0e3e2a8117..71e50fc65c14 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) aq_ptp_ring_free(self); aq_ptp_free(self); - if (likely(self->aq_fw_ops->deinit) && link_down) { + /* May be invoked during hot unplug. */ + if (pci_device_is_present(self->pdev) && + likely(self->aq_fw_ops->deinit) && link_down) { mutex_lock(&self->fwreq_mutex); self->aq_fw_ops->deinit(self->aq_hw); mutex_unlock(&self->fwreq_mutex); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 0715ea5bf13e..3b082114f2e5 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; - if (dev->phydev) + if (dev->phydev) { phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } /* MAC is not wake-up capable, return what the PHY does */ if (!device_can_wakeup(kdev)) @@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Overlay MAC capabilities with that of the PHY queried before */ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; - wol->wolopts = priv->wolopts; - memset(wol->sopass, 0, sizeof(wol->sopass)); + wol->wolopts |= priv->wolopts; + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; + + /* Otherwise the MAC one */ + memset(wol->sopass, 0, sizeof(wol->sopass)); if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } @@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Try Wake-on-LAN from the PHY first */ if (dev->phydev) { ret = phy_ethtool_set_wol(dev->phydev, wol); - if (ret != -EOPNOTSUPP) + if (ret != -EOPNOTSUPP && wol->wolopts) return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1c94bf1db718..d9d675f1ebfe 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -55,6 +55,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/crc32poly.h> +#include <linux/dmi.h> #include <net/checksum.h> #include <net/gso.h> @@ -18212,6 +18213,50 @@ static int tg3_resume(struct device *device) static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); +/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal + * PCIe AER event on the tg3 device if the tg3 device is not, or cannot + * be, powered down. + */ +static const struct dmi_system_id tg3_restart_aer_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"), + }, + }, + {} +}; + static void tg3_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) tg3_power_down(tp); + else if (system_state == SYSTEM_RESTART && + dmi_first_match(tg3_restart_aer_quirk_table) && + pdev->current_state != PCI_D3cold && + pdev->current_state != PCI_UNKNOWN) { + /* Disable PCIe AER on the tg3 to avoid a fatal + * error during this system restart. + */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + } rtnl_unlock(); diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d116e2b10bce..dbdb83567364 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d2d7736fd5f..9c9ea4c1b93b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -924,7 +921,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -940,6 +936,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, return rx_buf; } +/** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + /** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on @@ -1051,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1103,6 +1124,65 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->page = NULL; } +/** + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * @rx_ring: Rx ring with all the auxiliary data + * @xdp: XDP buffer carrying linear + frags part + * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage + * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution + * + * Walk through gathered fragments and satisfy internal page + * recycle mechanism; we take here an action related to verdict + * returned by XDP program; + */ +static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + u32 *xdp_xmit, u32 ntc, u32 verdict) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; + struct ice_rx_buf *buf; + int i; + + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (verdict == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + + if (++idx == cnt) + idx = 0; + } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } + + xdp->data = NULL; + rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; +} + /** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on @@ -1120,15 +1200,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; - u32 first; xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { @@ -1190,6 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1199,15 +1278,15 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + ice_get_pgcnts(rx_ring); + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1217,18 +1296,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; - break; + xdp_verdict = ICE_XDP_CONSUMED; } - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + + if (!skb) + break; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1257,23 +1330,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_pkts++; } - first = rx_ring->first_desc; - while (cached_ntc != first) { - struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; - - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { - buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } - - ice_put_rx_buf(rx_ring, buf); - if (++cached_ntc >= cnt) - cached_ntc = 0; - } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cb347c852ba9..806bce701df3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 79f960c6680d..6cf32b404127 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -5,49 +5,6 @@ #define _ICE_TXRX_LIB_H_ #include "ice.h" -/** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - /** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d04543e5697b..b34ebb916b89 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,6 +2424,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2892,6 +2897,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5868,6 +5878,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7219,29 +7232,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; } - if (!priv->plat->rx_fifo_size) { - if (priv->dma_cap.rx_fifo_size) { - priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Rx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.rx_fifo_size && - priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { dev_warn(priv->device, "Rx FIFO size (%u) exceeds dma capability\n", priv->plat->rx_fifo_size); priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; } - if (!priv->plat->tx_fifo_size) { - if (priv->dma_cap.tx_fifo_size) { - priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Tx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.tx_fifo_size && - priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { dev_warn(priv->device, "Tx FIFO size (%u) exceeds dma capability\n", priv->plat->tx_fifo_size); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8..acf96f262488 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588..616ecc38d172 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock_irq(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock_irq(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock_irq(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock_irq(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, { struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40046770f1bf..818d4e49aab5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1700,7 +1700,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); - if (status < 0) + + /* + * It's either a kernel error or the host observed a connection + * lost. In either case it's not possible communicate with the + * controller and thus enter the error code path. + */ + if (status < 0 || status == NVME_SC_HOST_PATH_ERROR) return status; /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 094be164ffdc..f4f1866fbd5b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -781,11 +781,19 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport) static void nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) { + enum nvme_ctrl_state state; + unsigned long flags; + dev_info(ctrl->ctrl.device, "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (nvme_ctrl_state(&ctrl->ctrl)) { + spin_lock_irqsave(&ctrl->lock, flags); + set_bit(ASSOC_FAILED, &ctrl->flags); + state = nvme_ctrl_state(&ctrl->ctrl); + spin_unlock_irqrestore(&ctrl->lock, flags); + + switch (state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: /* @@ -2079,7 +2087,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) + if (terminate_assoc && + nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } @@ -2533,6 +2542,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* * if an error (io timeout, etc) while (re)connecting, the remote * port requested terminating of the association (disconnect_ls) @@ -2540,9 +2551,8 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + if (state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); - set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); @@ -2550,7 +2560,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) } /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -3167,12 +3177,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) - ret = -EIO; if (ret) goto out_term_aen_ops; - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + spin_lock_irqsave(&ctrl->lock, flags); + if (!test_bit(ASSOC_FAILED, &ctrl->flags)) + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + else + ret = -EIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (ret) + goto out_term_aen_ops; ctrl->ctrl.nr_reconnects = 0; @@ -3578,8 +3594,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); goto fail_ctrl; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 278bed4e35bb..9197a5b173fd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2153,14 +2153,6 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, return 0; out_free_bufs: - while (--i >= 0) { - size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; - - dma_free_attrs(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr), - DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); - } - kfree(bufs); out_free_descs: dma_free_coherent(dev->dev, descs_size, descs, descs_dma); @@ -3147,7 +3139,9 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) * because of high power consumption (> 2 Watt) in s2idle * sleep. Only some boards with Intel CPU are affected. */ - if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || + dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + dmi_match(DMI_BOARD_NAME, "GXxMRXx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index b68a9e5f1ea3..3a41b9ab0f13 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_tls_attrs_group = { +static const struct attribute_group nvme_tls_attrs_group = { .attrs = nvme_tls_attrs, .is_visible = nvme_tls_attrs_are_visible, }; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e670dc185a96..acc138bbf8f2 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1068,6 +1068,7 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) goto out; } status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index a7ff05b3be29..eb406c90c167 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -287,7 +287,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) args.subsysnqn = d->subsysnqn; args.hostnqn = d->hostnqn; args.hostid = &d->hostid; - args.kato = c->kato; + args.kato = le32_to_cpu(c->kato); ctrl = nvmet_alloc_ctrl(&args); if (!ctrl) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c1f574fe3280..83be0657e6df 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -272,7 +272,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) iter_flags = SG_MITER_FROM_SG; } - if (req->cmd->rw.control & NVME_RW_LR) + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_LR)) opf |= REQ_FAILFAST_DEV; if (is_pci_p2pdma_page(sg_page(req->sg))) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index b540216c0c9a..4be8d22d2d8d 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -589,7 +589,7 @@ struct nvmet_alloc_ctrl_args { const struct nvmet_fabrics_ops *ops; struct device *p2p_client; u32 kato; - u32 result; + __le32 result; u16 error_loc; u16 status; }; diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e0bc90597dca..da3e7edcf49d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -108,9 +108,6 @@ void pci_save_aspm_l1ss_state(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); - if (parent->state_saved) - return; - /* * Save parent's L1 substate configuration so we have it for * pci_restore_aspm_l1ss_state(pdev) to restore. diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 1e604fbbda65..07de59ca2ebf 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -360,7 +360,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) return err; } - set_ctrl_reg_req_en(pdev, pdev->tph_mode); + set_ctrl_reg_req_en(pdev, pdev->tph_req_type); pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dfb5d4b8c046..30bd366d7b58 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) /* Create platform_profile structure and register */ priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev, - "ideapad-laptop", &priv->dytc, + "ideapad-laptop", priv->dytc, &dytc_profile_ops); if (IS_ERR(priv->dytc->ppdev)) { err = PTR_ERR(priv->dytc->ppdev); diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 5c3c0dfa1bf8..f369fb0d3d82 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -23,12 +23,14 @@ * IFS Image * --------- * - * Intel provides a firmware file containing the scan tests via - * github [#f1]_. Similar to microcode there is a separate file for each + * Intel provides firmware files containing the scan tests via the webpage [#f1]_. + * Look under "In-Field Scan Test Images Download" section towards the + * end of the page. Similar to microcode, there are separate files for each * family-model-stepping. IFS Images are not applicable for some test types. * Wherever applicable the sysfs directory would provide a "current_batch" file * (see below) for loading the image. * + * .. [#f1] https://intel.com/InFieldScan * * IFS Image Loading * ----------------- @@ -125,9 +127,6 @@ * 2) Hardware allows for some number of cores to be tested in parallel. * The driver does not make use of this, it only tests one core at a time. * - * .. [#f1] https://github.com/intel/TBD - * - * * Structural Based Functional Test at Field (SBAF): * ------------------------------------------------- * diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 10f04b944117..1ee0fb5f8250 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val) static int pmc_core_ltr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; - u64 decoded_snoop_ltr, decoded_non_snoop_ltr; - u32 ltr_raw_data, scale, val; + u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val; + u32 ltr_raw_data, scale; u16 snoop_ltr, nonsnoop_ltr; unsigned int i, index, ltr_index = 0; diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index 6c94137865c9..9b2f28b34bb5 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -697,6 +697,37 @@ bool dev_pm_genpd_get_hwmode(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_genpd_get_hwmode); +/** + * dev_pm_genpd_rpm_always_on() - Control if the PM domain can be powered off. + * + * @dev: Device for which the PM domain may need to stay on for. + * @on: Value to set or unset for the condition. + * + * For some usecases a consumer driver requires its device to remain power-on + * from the PM domain perspective during runtime. This function allows the + * behaviour to be dynamically controlled for a device attached to a genpd. + * + * It is assumed that the users guarantee that the genpd wouldn't be detached + * while this routine is getting called. + * + * Return: Returns 0 on success and negative error values on failures. + */ +int dev_pm_genpd_rpm_always_on(struct device *dev, bool on) +{ + struct generic_pm_domain *genpd; + + genpd = dev_to_genpd_safe(dev); + if (!genpd) + return -ENODEV; + + genpd_lock(genpd); + dev_gpd_data(dev)->rpm_always_on = on; + genpd_unlock(genpd); + + return 0; +} +EXPORT_SYMBOL_GPL(dev_pm_genpd_rpm_always_on); + static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed) { unsigned int state_idx = genpd->state_idx; @@ -868,6 +899,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, if (!pm_runtime_suspended(pdd->dev) || irq_safe_dev_in_sleep_domain(pdd->dev, genpd)) not_suspended++; + + /* The device may need its PM domain to stay powered on. */ + if (to_gpd_data(pdd)->rpm_always_on) + return -EBUSY; } if (not_suspended > 1 || (not_suspended == 1 && !one_dev_on)) diff --git a/drivers/pmdomain/rockchip/Kconfig b/drivers/pmdomain/rockchip/Kconfig index b0d70f1a8439..7e4f9b628f0b 100644 --- a/drivers/pmdomain/rockchip/Kconfig +++ b/drivers/pmdomain/rockchip/Kconfig @@ -4,6 +4,7 @@ if ARCH_ROCKCHIP || COMPILE_TEST config ROCKCHIP_PM_DOMAINS bool "Rockchip generic power domain" depends on PM + depends on HAVE_ARM_SMCCC_DISCOVERY select PM_GENERIC_DOMAINS help Say y here to enable power domain support. diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c index cb0f93800138..27a5c68ff8ba 100644 --- a/drivers/pmdomain/rockchip/pm-domains.c +++ b/drivers/pmdomain/rockchip/pm-domains.c @@ -5,6 +5,7 @@ * Copyright (c) 2015 ROCKCHIP, Co. Ltd. */ +#include <linux/arm-smccc.h> #include <linux/io.h> #include <linux/iopoll.h> #include <linux/err.h> @@ -20,6 +21,7 @@ #include <linux/regmap.h> #include <linux/mfd/syscon.h> #include <soc/rockchip/pm_domains.h> +#include <soc/rockchip/rockchip_sip.h> #include <dt-bindings/power/px30-power.h> #include <dt-bindings/power/rockchip,rv1126-power.h> #include <dt-bindings/power/rk3036-power.h> @@ -540,6 +542,7 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd, struct generic_pm_domain *genpd = &pd->genpd; u32 pd_pwr_offset = pd->info->pwr_offset; bool is_on, is_mem_on = false; + struct arm_smccc_res res; if (pd->info->pwr_mask == 0) return; @@ -567,6 +570,12 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd, genpd->name, is_on); return; } + + /* Inform firmware to keep this pd on or off */ + if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE) + arm_smccc_smc(ROCKCHIP_SIP_SUSPEND_MODE, ROCKCHIP_SLEEP_PD_CONFIG, + pmu->info->pwr_offset + pd_pwr_offset, + pd->info->pwr_mask, on, 0, 0, 0, &res); } static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on) diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 52c32dcbf7d8..4112a0097338 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -627,8 +627,7 @@ struct powercap_control_type *powercap_register_control_type( dev_set_name(&control_type->dev, "%s", name); result = device_register(&control_type->dev); if (result) { - if (control_type->allocated) - kfree(control_type); + put_device(&control_type->dev); return ERR_PTR(result); } idr_init(&control_type->idr); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 37c24ffea65c..5a3c670aec27 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -303,9 +303,9 @@ if SCSI_LOWLEVEL && SCSI config ISCSI_TCP tristate "iSCSI Initiator over TCP/IP" depends on SCSI && INET + select CRC32 select CRYPTO select CRYPTO_MD5 - select CRYPTO_CRC32C select SCSI_ISCSI_ATTRS help The iSCSI Driver provides a host with the ability to access storage @@ -336,7 +336,6 @@ source "drivers/scsi/cxgbi/Kconfig" source "drivers/scsi/bnx2i/Kconfig" source "drivers/scsi/bnx2fc/Kconfig" source "drivers/scsi/be2iscsi/Kconfig" -source "drivers/scsi/cxlflash/Kconfig" config SGIWD93_SCSI tristate "SGI WD93C93 SCSI Driver" diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 1313ddf2fd1a..16de3e41f94c 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -96,7 +96,6 @@ obj-$(CONFIG_SCSI_SYM53C8XX_2) += sym53c8xx_2/ obj-$(CONFIG_SCSI_ZALON) += zalon7xx.o obj-$(CONFIG_SCSI_DC395x) += dc395x.o obj-$(CONFIG_SCSI_AM53C974) += esp_scsi.o am53c974.o -obj-$(CONFIG_CXLFLASH) += cxlflash/ obj-$(CONFIG_MEGARAID_LEGACY) += megaraid.o obj-$(CONFIG_MEGARAID_NEWGEN) += megaraid/ obj-$(CONFIG_MEGARAID_SAS) += megaraid/ diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index abf6a82b74af..0be719f38377 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -3221,8 +3221,8 @@ int aac_scsi_cmd(struct scsi_cmnd * scsicmd) break; } fallthrough; - case RESERVE: - case RELEASE: + case RESERVE_6: + case RELEASE_6: case REZERO_UNIT: case REASSIGN_BLOCKS: case SEEK_10: diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 91170a67cc91..4b12e6dd8f07 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -2029,7 +2029,7 @@ static void aac_pci_resume(struct pci_dev *pdev) dev_err(&pdev->dev, "aacraid: PCI error - resume\n"); } -static struct pci_error_handlers aac_pci_err_handler = { +static const struct pci_error_handlers aac_pci_err_handler = { .error_detected = aac_pci_error_detected, .mmio_enabled = aac_pci_mmio_enabled, .slot_reset = aac_pci_slot_reset, diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index e50a3dbf9de3..ef21b85cf014 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -591,7 +591,7 @@ datadir_t acornscsi_datadirection(int command) case CHANGE_DEFINITION: case COMPARE: case COPY: case COPY_VERIFY: case LOG_SELECT: case MODE_SELECT: case MODE_SELECT_10: case SEND_DIAGNOSTIC: case WRITE_BUFFER: - case FORMAT_UNIT: case REASSIGN_BLOCKS: case RESERVE: + case FORMAT_UNIT: case REASSIGN_BLOCKS: case RESERVE_6: case SEARCH_EQUAL: case SEARCH_HIGH: case SEARCH_LOW: case WRITE_6: case WRITE_10: case WRITE_VERIFY: case UPDATE_BLOCK: case WRITE_LONG: case WRITE_SAME: diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 76a1e373386e..a8b399ed98fc 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5776,7 +5776,7 @@ static void beiscsi_remove(struct pci_dev *pcidev) } -static struct pci_error_handlers beiscsi_eeh_handlers = { +static const struct pci_error_handlers beiscsi_eeh_handlers = { .error_detected = beiscsi_eeh_err_detected, .slot_reset = beiscsi_eeh_reset, .resume = beiscsi_eeh_resume, diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 6aa1d3a7e24b..f015c53de0d4 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1642,7 +1642,7 @@ MODULE_DEVICE_TABLE(pci, bfad_id_table); /* * PCI error recovery handlers. */ -static struct pci_error_handlers bfad_err_handler = { +static const struct pci_error_handlers bfad_err_handler = { .error_detected = bfad_pci_error_detected, .slot_reset = bfad_pci_slot_reset, .mmio_enabled = bfad_pci_mmio_enabled, diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 9a3f2ed050bd..79c8dafdd49e 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -1162,7 +1162,7 @@ csio_pci_resume(struct pci_dev *pdev) dev_err(&pdev->dev, "resume of device failed: %d\n", rv); } -static struct pci_error_handlers csio_err_handler = { +static const struct pci_error_handlers csio_err_handler = { .error_detected = csio_pci_error_detected, .slot_reset = csio_pci_slot_reset, .resume = csio_pci_resume, diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig deleted file mode 100644 index c424d36e89a6..000000000000 --- a/drivers/scsi/cxlflash/Kconfig +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# IBM CXL-attached Flash Accelerator SCSI Driver -# - -config CXLFLASH - tristate "Support for IBM CAPI Flash (DEPRECATED)" - depends on PCI && SCSI && (CXL || OCXL) && EEH - select IRQ_POLL - help - The cxlflash driver is deprecated and will be removed in a future - kernel release. - - Allows CAPI Accelerated IO to Flash - If unsure, say N. diff --git a/drivers/scsi/cxlflash/Makefile b/drivers/scsi/cxlflash/Makefile deleted file mode 100644 index fd2f0dd9daf9..000000000000 --- a/drivers/scsi/cxlflash/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_CXLFLASH) += cxlflash.o -cxlflash-y += main.o superpipe.o lunmgt.o vlun.o -cxlflash-$(CONFIG_CXL) += cxl_hw.o -cxlflash-$(CONFIG_OCXL) += ocxl_hw.o diff --git a/drivers/scsi/cxlflash/backend.h b/drivers/scsi/cxlflash/backend.h deleted file mode 100644 index 181e0445ed42..000000000000 --- a/drivers/scsi/cxlflash/backend.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * Uma Krishnan <ukrishn@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2018 IBM Corporation - */ - -#ifndef _CXLFLASH_BACKEND_H -#define _CXLFLASH_BACKEND_H - -extern const struct cxlflash_backend_ops cxlflash_cxl_ops; -extern const struct cxlflash_backend_ops cxlflash_ocxl_ops; - -struct cxlflash_backend_ops { - struct module *module; - void __iomem * (*psa_map)(void *ctx_cookie); - void (*psa_unmap)(void __iomem *addr); - int (*process_element)(void *ctx_cookie); - int (*map_afu_irq)(void *ctx_cookie, int num, irq_handler_t handler, - void *cookie, char *name); - void (*unmap_afu_irq)(void *ctx_cookie, int num, void *cookie); - u64 (*get_irq_objhndl)(void *ctx_cookie, int irq); - int (*start_context)(void *ctx_cookie); - int (*stop_context)(void *ctx_cookie); - int (*afu_reset)(void *ctx_cookie); - void (*set_master)(void *ctx_cookie); - void * (*get_context)(struct pci_dev *dev, void *afu_cookie); - void * (*dev_context_init)(struct pci_dev *dev, void *afu_cookie); - int (*release_context)(void *ctx_cookie); - void (*perst_reloads_same_image)(void *afu_cookie, bool image); - ssize_t (*read_adapter_vpd)(struct pci_dev *dev, void *buf, - size_t count); - int (*allocate_afu_irqs)(void *ctx_cookie, int num); - void (*free_afu_irqs)(void *ctx_cookie); - void * (*create_afu)(struct pci_dev *dev); - void (*destroy_afu)(void *afu_cookie); - struct file * (*get_fd)(void *ctx_cookie, struct file_operations *fops, - int *fd); - void * (*fops_get_context)(struct file *file); - int (*start_work)(void *ctx_cookie, u64 irqs); - int (*fd_mmap)(struct file *file, struct vm_area_struct *vm); - int (*fd_release)(struct inode *inode, struct file *file); -}; - -#endif /* _CXLFLASH_BACKEND_H */ diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h deleted file mode 100644 index de6229e27b48..000000000000 --- a/drivers/scsi/cxlflash/common.h +++ /dev/null @@ -1,340 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#ifndef _CXLFLASH_COMMON_H -#define _CXLFLASH_COMMON_H - -#include <linux/async.h> -#include <linux/cdev.h> -#include <linux/irq_poll.h> -#include <linux/list.h> -#include <linux/rwsem.h> -#include <linux/types.h> -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_device.h> - -#include "backend.h" - -extern const struct file_operations cxlflash_cxl_fops; - -#define MAX_CONTEXT CXLFLASH_MAX_CONTEXT /* num contexts per afu */ -#define MAX_FC_PORTS CXLFLASH_MAX_FC_PORTS /* max ports per AFU */ -#define LEGACY_FC_PORTS 2 /* legacy ports per AFU */ - -#define CHAN2PORTBANK(_x) ((_x) >> ilog2(CXLFLASH_NUM_FC_PORTS_PER_BANK)) -#define CHAN2BANKPORT(_x) ((_x) & (CXLFLASH_NUM_FC_PORTS_PER_BANK - 1)) - -#define CHAN2PORTMASK(_x) (1 << (_x)) /* channel to port mask */ -#define PORTMASK2CHAN(_x) (ilog2((_x))) /* port mask to channel */ -#define PORTNUM2CHAN(_x) ((_x) - 1) /* port number to channel */ - -#define CXLFLASH_BLOCK_SIZE 4096 /* 4K blocks */ -#define CXLFLASH_MAX_XFER_SIZE 16777216 /* 16MB transfer */ -#define CXLFLASH_MAX_SECTORS (CXLFLASH_MAX_XFER_SIZE/512) /* SCSI wants - * max_sectors - * in units of - * 512 byte - * sectors - */ - -#define MAX_RHT_PER_CONTEXT (PAGE_SIZE / sizeof(struct sisl_rht_entry)) - -/* AFU command retry limit */ -#define MC_RETRY_CNT 5 /* Sufficient for SCSI and certain AFU errors */ - -/* Command management definitions */ -#define CXLFLASH_MAX_CMDS 256 -#define CXLFLASH_MAX_CMDS_PER_LUN CXLFLASH_MAX_CMDS - -/* RRQ for master issued cmds */ -#define NUM_RRQ_ENTRY CXLFLASH_MAX_CMDS - -/* SQ for master issued cmds */ -#define NUM_SQ_ENTRY CXLFLASH_MAX_CMDS - -/* Hardware queue definitions */ -#define CXLFLASH_DEF_HWQS 1 -#define CXLFLASH_MAX_HWQS 8 -#define PRIMARY_HWQ 0 - - -static inline void check_sizes(void) -{ - BUILD_BUG_ON_NOT_POWER_OF_2(CXLFLASH_NUM_FC_PORTS_PER_BANK); - BUILD_BUG_ON_NOT_POWER_OF_2(CXLFLASH_MAX_CMDS); -} - -/* AFU defines a fixed size of 4K for command buffers (borrow 4K page define) */ -#define CMD_BUFSIZE SIZE_4K - -enum cxlflash_lr_state { - LINK_RESET_INVALID, - LINK_RESET_REQUIRED, - LINK_RESET_COMPLETE -}; - -enum cxlflash_init_state { - INIT_STATE_NONE, - INIT_STATE_PCI, - INIT_STATE_AFU, - INIT_STATE_SCSI, - INIT_STATE_CDEV -}; - -enum cxlflash_state { - STATE_PROBING, /* Initial state during probe */ - STATE_PROBED, /* Temporary state, probe completed but EEH occurred */ - STATE_NORMAL, /* Normal running state, everything good */ - STATE_RESET, /* Reset state, trying to reset/recover */ - STATE_FAILTERM /* Failed/terminating state, error out users/threads */ -}; - -enum cxlflash_hwq_mode { - HWQ_MODE_RR, /* Roundrobin (default) */ - HWQ_MODE_TAG, /* Distribute based on block MQ tag */ - HWQ_MODE_CPU, /* CPU affinity */ - MAX_HWQ_MODE -}; - -/* - * Each context has its own set of resource handles that is visible - * only from that context. - */ - -struct cxlflash_cfg { - struct afu *afu; - - const struct cxlflash_backend_ops *ops; - struct pci_dev *dev; - struct pci_device_id *dev_id; - struct Scsi_Host *host; - int num_fc_ports; - struct cdev cdev; - struct device *chardev; - - ulong cxlflash_regs_pci; - - struct work_struct work_q; - enum cxlflash_init_state init_state; - enum cxlflash_lr_state lr_state; - int lr_port; - atomic_t scan_host_needed; - - void *afu_cookie; - - atomic_t recovery_threads; - struct mutex ctx_recovery_mutex; - struct mutex ctx_tbl_list_mutex; - struct rw_semaphore ioctl_rwsem; - struct ctx_info *ctx_tbl[MAX_CONTEXT]; - struct list_head ctx_err_recovery; /* contexts w/ recovery pending */ - struct file_operations cxl_fops; - - /* Parameters that are LUN table related */ - int last_lun_index[MAX_FC_PORTS]; - int promote_lun_index; - struct list_head lluns; /* list of llun_info structs */ - - wait_queue_head_t tmf_waitq; - spinlock_t tmf_slock; - bool tmf_active; - bool ws_unmap; /* Write-same unmap supported */ - wait_queue_head_t reset_waitq; - enum cxlflash_state state; - async_cookie_t async_reset_cookie; -}; - -struct afu_cmd { - struct sisl_ioarcb rcb; /* IOARCB (cache line aligned) */ - struct sisl_ioasa sa; /* IOASA must follow IOARCB */ - struct afu *parent; - struct scsi_cmnd *scp; - struct completion cevent; - struct list_head queue; - u32 hwq_index; - - u8 cmd_tmf:1, - cmd_aborted:1; - - struct list_head list; /* Pending commands link */ - - /* As per the SISLITE spec the IOARCB EA has to be 16-byte aligned. - * However for performance reasons the IOARCB/IOASA should be - * cache line aligned. - */ -} __aligned(cache_line_size()); - -static inline struct afu_cmd *sc_to_afuc(struct scsi_cmnd *sc) -{ - return PTR_ALIGN(scsi_cmd_priv(sc), __alignof__(struct afu_cmd)); -} - -static inline struct afu_cmd *sc_to_afuci(struct scsi_cmnd *sc) -{ - struct afu_cmd *afuc = sc_to_afuc(sc); - - INIT_LIST_HEAD(&afuc->queue); - return afuc; -} - -static inline struct afu_cmd *sc_to_afucz(struct scsi_cmnd *sc) -{ - struct afu_cmd *afuc = sc_to_afuc(sc); - - memset(afuc, 0, sizeof(*afuc)); - return sc_to_afuci(sc); -} - -struct hwq { - /* Stuff requiring alignment go first. */ - struct sisl_ioarcb sq[NUM_SQ_ENTRY]; /* 16K SQ */ - u64 rrq_entry[NUM_RRQ_ENTRY]; /* 2K RRQ */ - - /* Beware of alignment till here. Preferably introduce new - * fields after this point - */ - struct afu *afu; - void *ctx_cookie; - struct sisl_host_map __iomem *host_map; /* MC host map */ - struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */ - ctx_hndl_t ctx_hndl; /* master's context handle */ - u32 index; /* Index of this hwq */ - int num_irqs; /* Number of interrupts requested for context */ - struct list_head pending_cmds; /* Commands pending completion */ - - atomic_t hsq_credits; - spinlock_t hsq_slock; /* Hardware send queue lock */ - struct sisl_ioarcb *hsq_start; - struct sisl_ioarcb *hsq_end; - struct sisl_ioarcb *hsq_curr; - spinlock_t hrrq_slock; - u64 *hrrq_start; - u64 *hrrq_end; - u64 *hrrq_curr; - bool toggle; - bool hrrq_online; - - s64 room; - - struct irq_poll irqpoll; -} __aligned(cache_line_size()); - -struct afu { - struct hwq hwqs[CXLFLASH_MAX_HWQS]; - int (*send_cmd)(struct afu *afu, struct afu_cmd *cmd); - int (*context_reset)(struct hwq *hwq); - - /* AFU HW */ - struct cxlflash_afu_map __iomem *afu_map; /* entire MMIO map */ - - atomic_t cmds_active; /* Number of currently active AFU commands */ - struct mutex sync_active; /* Mutex to serialize AFU commands */ - u64 hb; - u32 internal_lun; /* User-desired LUN mode for this AFU */ - - u32 num_hwqs; /* Number of hardware queues */ - u32 desired_hwqs; /* Desired h/w queues, effective on AFU reset */ - enum cxlflash_hwq_mode hwq_mode; /* Steering mode for h/w queues */ - u32 hwq_rr_count; /* Count to distribute traffic for roundrobin */ - - char version[16]; - u64 interface_version; - - u32 irqpoll_weight; - struct cxlflash_cfg *parent; /* Pointer back to parent cxlflash_cfg */ -}; - -static inline struct hwq *get_hwq(struct afu *afu, u32 index) -{ - WARN_ON(index >= CXLFLASH_MAX_HWQS); - - return &afu->hwqs[index]; -} - -static inline bool afu_is_irqpoll_enabled(struct afu *afu) -{ - return !!afu->irqpoll_weight; -} - -static inline bool afu_has_cap(struct afu *afu, u64 cap) -{ - u64 afu_cap = afu->interface_version >> SISL_INTVER_CAP_SHIFT; - - return afu_cap & cap; -} - -static inline bool afu_is_ocxl_lisn(struct afu *afu) -{ - return afu_has_cap(afu, SISL_INTVER_CAP_OCXL_LISN); -} - -static inline bool afu_is_afu_debug(struct afu *afu) -{ - return afu_has_cap(afu, SISL_INTVER_CAP_AFU_DEBUG); -} - -static inline bool afu_is_lun_provision(struct afu *afu) -{ - return afu_has_cap(afu, SISL_INTVER_CAP_LUN_PROVISION); -} - -static inline bool afu_is_sq_cmd_mode(struct afu *afu) -{ - return afu_has_cap(afu, SISL_INTVER_CAP_SQ_CMD_MODE); -} - -static inline bool afu_is_ioarrin_cmd_mode(struct afu *afu) -{ - return afu_has_cap(afu, SISL_INTVER_CAP_IOARRIN_CMD_MODE); -} - -static inline u64 lun_to_lunid(u64 lun) -{ - __be64 lun_id; - - int_to_scsilun(lun, (struct scsi_lun *)&lun_id); - return be64_to_cpu(lun_id); -} - -static inline struct fc_port_bank __iomem *get_fc_port_bank( - struct cxlflash_cfg *cfg, int i) -{ - struct afu *afu = cfg->afu; - - return &afu->afu_map->global.bank[CHAN2PORTBANK(i)]; -} - -static inline __be64 __iomem *get_fc_port_regs(struct cxlflash_cfg *cfg, int i) -{ - struct fc_port_bank __iomem *fcpb = get_fc_port_bank(cfg, i); - - return &fcpb->fc_port_regs[CHAN2BANKPORT(i)][0]; -} - -static inline __be64 __iomem *get_fc_port_luns(struct cxlflash_cfg *cfg, int i) -{ - struct fc_port_bank __iomem *fcpb = get_fc_port_bank(cfg, i); - - return &fcpb->fc_port_luns[CHAN2BANKPORT(i)][0]; -} - -int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t c, res_hndl_t r, u8 mode); -void cxlflash_list_init(void); -void cxlflash_term_global_luns(void); -void cxlflash_free_errpage(void); -int cxlflash_ioctl(struct scsi_device *sdev, unsigned int cmd, - void __user *arg); -void cxlflash_stop_term_user_contexts(struct cxlflash_cfg *cfg); -int cxlflash_mark_contexts_error(struct cxlflash_cfg *cfg); -void cxlflash_term_local_luns(struct cxlflash_cfg *cfg); -void cxlflash_restore_luntable(struct cxlflash_cfg *cfg); - -#endif /* ifndef _CXLFLASH_COMMON_H */ diff --git a/drivers/scsi/cxlflash/cxl_hw.c b/drivers/scsi/cxlflash/cxl_hw.c deleted file mode 100644 index b814130f3f5c..000000000000 --- a/drivers/scsi/cxlflash/cxl_hw.c +++ /dev/null @@ -1,177 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * Uma Krishnan <ukrishn@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2018 IBM Corporation - */ - -#include <misc/cxl.h> - -#include "backend.h" - -/* - * The following routines map the cxlflash backend operations to existing CXL - * kernel API function and are largely simple shims that provide an abstraction - * for converting generic context and AFU cookies into cxl_context or cxl_afu - * pointers. - */ - -static void __iomem *cxlflash_psa_map(void *ctx_cookie) -{ - return cxl_psa_map(ctx_cookie); -} - -static void cxlflash_psa_unmap(void __iomem *addr) -{ - cxl_psa_unmap(addr); -} - -static int cxlflash_process_element(void *ctx_cookie) -{ - return cxl_process_element(ctx_cookie); -} - -static int cxlflash_map_afu_irq(void *ctx_cookie, int num, - irq_handler_t handler, void *cookie, char *name) -{ - return cxl_map_afu_irq(ctx_cookie, num, handler, cookie, name); -} - -static void cxlflash_unmap_afu_irq(void *ctx_cookie, int num, void *cookie) -{ - cxl_unmap_afu_irq(ctx_cookie, num, cookie); -} - -static u64 cxlflash_get_irq_objhndl(void *ctx_cookie, int irq) -{ - /* Dummy fop for cxl */ - return 0; -} - -static int cxlflash_start_context(void *ctx_cookie) -{ - return cxl_start_context(ctx_cookie, 0, NULL); -} - -static int cxlflash_stop_context(void *ctx_cookie) -{ - return cxl_stop_context(ctx_cookie); -} - -static int cxlflash_afu_reset(void *ctx_cookie) -{ - return cxl_afu_reset(ctx_cookie); -} - -static void cxlflash_set_master(void *ctx_cookie) -{ - cxl_set_master(ctx_cookie); -} - -static void *cxlflash_get_context(struct pci_dev *dev, void *afu_cookie) -{ - return cxl_get_context(dev); -} - -static void *cxlflash_dev_context_init(struct pci_dev *dev, void *afu_cookie) -{ - return cxl_dev_context_init(dev); -} - -static int cxlflash_release_context(void *ctx_cookie) -{ - return cxl_release_context(ctx_cookie); -} - -static void cxlflash_perst_reloads_same_image(void *afu_cookie, bool image) -{ - cxl_perst_reloads_same_image(afu_cookie, image); -} - -static ssize_t cxlflash_read_adapter_vpd(struct pci_dev *dev, - void *buf, size_t count) -{ - return cxl_read_adapter_vpd(dev, buf, count); -} - -static int cxlflash_allocate_afu_irqs(void *ctx_cookie, int num) -{ - return cxl_allocate_afu_irqs(ctx_cookie, num); -} - -static void cxlflash_free_afu_irqs(void *ctx_cookie) -{ - cxl_free_afu_irqs(ctx_cookie); -} - -static void *cxlflash_create_afu(struct pci_dev *dev) -{ - return cxl_pci_to_afu(dev); -} - -static void cxlflash_destroy_afu(void *afu) -{ - /* Dummy fop for cxl */ -} - -static struct file *cxlflash_get_fd(void *ctx_cookie, - struct file_operations *fops, int *fd) -{ - return cxl_get_fd(ctx_cookie, fops, fd); -} - -static void *cxlflash_fops_get_context(struct file *file) -{ - return cxl_fops_get_context(file); -} - -static int cxlflash_start_work(void *ctx_cookie, u64 irqs) -{ - struct cxl_ioctl_start_work work = { 0 }; - - work.num_interrupts = irqs; - work.flags = CXL_START_WORK_NUM_IRQS; - - return cxl_start_work(ctx_cookie, &work); -} - -static int cxlflash_fd_mmap(struct file *file, struct vm_area_struct *vm) -{ - return cxl_fd_mmap(file, vm); -} - -static int cxlflash_fd_release(struct inode *inode, struct file *file) -{ - return cxl_fd_release(inode, file); -} - -const struct cxlflash_backend_ops cxlflash_cxl_ops = { - .module = THIS_MODULE, - .psa_map = cxlflash_psa_map, - .psa_unmap = cxlflash_psa_unmap, - .process_element = cxlflash_process_element, - .map_afu_irq = cxlflash_map_afu_irq, - .unmap_afu_irq = cxlflash_unmap_afu_irq, - .get_irq_objhndl = cxlflash_get_irq_objhndl, - .start_context = cxlflash_start_context, - .stop_context = cxlflash_stop_context, - .afu_reset = cxlflash_afu_reset, - .set_master = cxlflash_set_master, - .get_context = cxlflash_get_context, - .dev_context_init = cxlflash_dev_context_init, - .release_context = cxlflash_release_context, - .perst_reloads_same_image = cxlflash_perst_reloads_same_image, - .read_adapter_vpd = cxlflash_read_adapter_vpd, - .allocate_afu_irqs = cxlflash_allocate_afu_irqs, - .free_afu_irqs = cxlflash_free_afu_irqs, - .create_afu = cxlflash_create_afu, - .destroy_afu = cxlflash_destroy_afu, - .get_fd = cxlflash_get_fd, - .fops_get_context = cxlflash_fops_get_context, - .start_work = cxlflash_start_work, - .fd_mmap = cxlflash_fd_mmap, - .fd_release = cxlflash_fd_release, -}; diff --git a/drivers/scsi/cxlflash/lunmgt.c b/drivers/scsi/cxlflash/lunmgt.c deleted file mode 100644 index 962c797fda07..000000000000 --- a/drivers/scsi/cxlflash/lunmgt.c +++ /dev/null @@ -1,278 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#include <linux/unaligned.h> - -#include <linux/interrupt.h> -#include <linux/pci.h> - -#include <scsi/scsi_host.h> -#include <uapi/scsi/cxlflash_ioctl.h> - -#include "sislite.h" -#include "common.h" -#include "vlun.h" -#include "superpipe.h" - -/** - * create_local() - allocate and initialize a local LUN information structure - * @sdev: SCSI device associated with LUN. - * @wwid: World Wide Node Name for LUN. - * - * Return: Allocated local llun_info structure on success, NULL on failure - */ -static struct llun_info *create_local(struct scsi_device *sdev, u8 *wwid) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = NULL; - - lli = kzalloc(sizeof(*lli), GFP_KERNEL); - if (unlikely(!lli)) { - dev_err(dev, "%s: could not allocate lli\n", __func__); - goto out; - } - - lli->sdev = sdev; - lli->host_no = sdev->host->host_no; - lli->in_table = false; - - memcpy(lli->wwid, wwid, DK_CXLFLASH_MANAGE_LUN_WWID_LEN); -out: - return lli; -} - -/** - * create_global() - allocate and initialize a global LUN information structure - * @sdev: SCSI device associated with LUN. - * @wwid: World Wide Node Name for LUN. - * - * Return: Allocated global glun_info structure on success, NULL on failure - */ -static struct glun_info *create_global(struct scsi_device *sdev, u8 *wwid) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct glun_info *gli = NULL; - - gli = kzalloc(sizeof(*gli), GFP_KERNEL); - if (unlikely(!gli)) { - dev_err(dev, "%s: could not allocate gli\n", __func__); - goto out; - } - - mutex_init(&gli->mutex); - memcpy(gli->wwid, wwid, DK_CXLFLASH_MANAGE_LUN_WWID_LEN); -out: - return gli; -} - -/** - * lookup_local() - find a local LUN information structure by WWID - * @cfg: Internal structure associated with the host. - * @wwid: WWID associated with LUN. - * - * Return: Found local lun_info structure on success, NULL on failure - */ -static struct llun_info *lookup_local(struct cxlflash_cfg *cfg, u8 *wwid) -{ - struct llun_info *lli, *temp; - - list_for_each_entry_safe(lli, temp, &cfg->lluns, list) - if (!memcmp(lli->wwid, wwid, DK_CXLFLASH_MANAGE_LUN_WWID_LEN)) - return lli; - - return NULL; -} - -/** - * lookup_global() - find a global LUN information structure by WWID - * @wwid: WWID associated with LUN. - * - * Return: Found global lun_info structure on success, NULL on failure - */ -static struct glun_info *lookup_global(u8 *wwid) -{ - struct glun_info *gli, *temp; - - list_for_each_entry_safe(gli, temp, &global.gluns, list) - if (!memcmp(gli->wwid, wwid, DK_CXLFLASH_MANAGE_LUN_WWID_LEN)) - return gli; - - return NULL; -} - -/** - * find_and_create_lun() - find or create a local LUN information structure - * @sdev: SCSI device associated with LUN. - * @wwid: WWID associated with LUN. - * - * The LUN is kept both in a local list (per adapter) and in a global list - * (across all adapters). Certain attributes of the LUN are local to the - * adapter (such as index, port selection mask, etc.). - * - * The block allocation map is shared across all adapters (i.e. associated - * wih the global list). Since different attributes are associated with - * the per adapter and global entries, allocate two separate structures for each - * LUN (one local, one global). - * - * Keep a pointer back from the local to the global entry. - * - * This routine assumes the caller holds the global mutex. - * - * Return: Found/Allocated local lun_info structure on success, NULL on failure - */ -static struct llun_info *find_and_create_lun(struct scsi_device *sdev, u8 *wwid) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = NULL; - struct glun_info *gli = NULL; - - if (unlikely(!wwid)) - goto out; - - lli = lookup_local(cfg, wwid); - if (lli) - goto out; - - lli = create_local(sdev, wwid); - if (unlikely(!lli)) - goto out; - - gli = lookup_global(wwid); - if (gli) { - lli->parent = gli; - list_add(&lli->list, &cfg->lluns); - goto out; - } - - gli = create_global(sdev, wwid); - if (unlikely(!gli)) { - kfree(lli); - lli = NULL; - goto out; - } - - lli->parent = gli; - list_add(&lli->list, &cfg->lluns); - - list_add(&gli->list, &global.gluns); - -out: - dev_dbg(dev, "%s: returning lli=%p, gli=%p\n", __func__, lli, gli); - return lli; -} - -/** - * cxlflash_term_local_luns() - Delete all entries from local LUN list, free. - * @cfg: Internal structure associated with the host. - */ -void cxlflash_term_local_luns(struct cxlflash_cfg *cfg) -{ - struct llun_info *lli, *temp; - - mutex_lock(&global.mutex); - list_for_each_entry_safe(lli, temp, &cfg->lluns, list) { - list_del(&lli->list); - kfree(lli); - } - mutex_unlock(&global.mutex); -} - -/** - * cxlflash_list_init() - initializes the global LUN list - */ -void cxlflash_list_init(void) -{ - INIT_LIST_HEAD(&global.gluns); - mutex_init(&global.mutex); - global.err_page = NULL; -} - -/** - * cxlflash_term_global_luns() - frees resources associated with global LUN list - */ -void cxlflash_term_global_luns(void) -{ - struct glun_info *gli, *temp; - - mutex_lock(&global.mutex); - list_for_each_entry_safe(gli, temp, &global.gluns, list) { - list_del(&gli->list); - cxlflash_ba_terminate(&gli->blka.ba_lun); - kfree(gli); - } - mutex_unlock(&global.mutex); -} - -/** - * cxlflash_manage_lun() - handles LUN management activities - * @sdev: SCSI device associated with LUN. - * @arg: Manage ioctl data structure. - * - * This routine is used to notify the driver about a LUN's WWID and associate - * SCSI devices (sdev) with a global LUN instance. Additionally it serves to - * change a LUN's operating mode: legacy or superpipe. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_manage_lun(struct scsi_device *sdev, void *arg) -{ - struct dk_cxlflash_manage_lun *manage = arg; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = NULL; - int rc = 0; - u64 flags = manage->hdr.flags; - u32 chan = sdev->channel; - - mutex_lock(&global.mutex); - lli = find_and_create_lun(sdev, manage->wwid); - dev_dbg(dev, "%s: WWID=%016llx%016llx, flags=%016llx lli=%p\n", - __func__, get_unaligned_be64(&manage->wwid[0]), - get_unaligned_be64(&manage->wwid[8]), manage->hdr.flags, lli); - if (unlikely(!lli)) { - rc = -ENOMEM; - goto out; - } - - if (flags & DK_CXLFLASH_MANAGE_LUN_ENABLE_SUPERPIPE) { - /* - * Update port selection mask based upon channel, store off LUN - * in unpacked, AFU-friendly format, and hang LUN reference in - * the sdev. - */ - lli->port_sel |= CHAN2PORTMASK(chan); - lli->lun_id[chan] = lun_to_lunid(sdev->lun); - sdev->hostdata = lli; - } else if (flags & DK_CXLFLASH_MANAGE_LUN_DISABLE_SUPERPIPE) { - if (lli->parent->mode != MODE_NONE) - rc = -EBUSY; - else { - /* - * Clean up local LUN for this port and reset table - * tracking when no more references exist. - */ - sdev->hostdata = NULL; - lli->port_sel &= ~CHAN2PORTMASK(chan); - if (lli->port_sel == 0U) - lli->in_table = false; - } - } - - dev_dbg(dev, "%s: port_sel=%08x chan=%u lun_id=%016llx\n", - __func__, lli->port_sel, chan, lli->lun_id[chan]); - -out: - mutex_unlock(&global.mutex); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c deleted file mode 100644 index ae626e389c8b..000000000000 --- a/drivers/scsi/cxlflash/main.c +++ /dev/null @@ -1,3970 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#include <linux/delay.h> -#include <linux/list.h> -#include <linux/module.h> -#include <linux/pci.h> - -#include <linux/unaligned.h> - -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_host.h> -#include <uapi/scsi/cxlflash_ioctl.h> - -#include "main.h" -#include "sislite.h" -#include "common.h" - -MODULE_DESCRIPTION(CXLFLASH_ADAPTER_NAME); -MODULE_AUTHOR("Manoj N. Kumar <manoj@linux.vnet.ibm.com>"); -MODULE_AUTHOR("Matthew R. Ochs <mrochs@linux.vnet.ibm.com>"); -MODULE_LICENSE("GPL"); - -static char *cxlflash_devnode(const struct device *dev, umode_t *mode); -static const struct class cxlflash_class = { - .name = "cxlflash", - .devnode = cxlflash_devnode, -}; - -static u32 cxlflash_major; -static DECLARE_BITMAP(cxlflash_minor, CXLFLASH_MAX_ADAPTERS); - -/** - * process_cmd_err() - command error handler - * @cmd: AFU command that experienced the error. - * @scp: SCSI command associated with the AFU command in error. - * - * Translates error bits from AFU command to SCSI command results. - */ -static void process_cmd_err(struct afu_cmd *cmd, struct scsi_cmnd *scp) -{ - struct afu *afu = cmd->parent; - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct sisl_ioasa *ioasa; - u32 resid; - - ioasa = &(cmd->sa); - - if (ioasa->rc.flags & SISL_RC_FLAGS_UNDERRUN) { - resid = ioasa->resid; - scsi_set_resid(scp, resid); - dev_dbg(dev, "%s: cmd underrun cmd = %p scp = %p, resid = %d\n", - __func__, cmd, scp, resid); - } - - if (ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN) { - dev_dbg(dev, "%s: cmd underrun cmd = %p scp = %p\n", - __func__, cmd, scp); - scp->result = (DID_ERROR << 16); - } - - dev_dbg(dev, "%s: cmd failed afu_rc=%02x scsi_rc=%02x fc_rc=%02x " - "afu_extra=%02x scsi_extra=%02x fc_extra=%02x\n", __func__, - ioasa->rc.afu_rc, ioasa->rc.scsi_rc, ioasa->rc.fc_rc, - ioasa->afu_extra, ioasa->scsi_extra, ioasa->fc_extra); - - if (ioasa->rc.scsi_rc) { - /* We have a SCSI status */ - if (ioasa->rc.flags & SISL_RC_FLAGS_SENSE_VALID) { - memcpy(scp->sense_buffer, ioasa->sense_data, - SISL_SENSE_DATA_LEN); - scp->result = ioasa->rc.scsi_rc; - } else - scp->result = ioasa->rc.scsi_rc | (DID_ERROR << 16); - } - - /* - * We encountered an error. Set scp->result based on nature - * of error. - */ - if (ioasa->rc.fc_rc) { - /* We have an FC status */ - switch (ioasa->rc.fc_rc) { - case SISL_FC_RC_LINKDOWN: - scp->result = (DID_REQUEUE << 16); - break; - case SISL_FC_RC_RESID: - /* This indicates an FCP resid underrun */ - if (!(ioasa->rc.flags & SISL_RC_FLAGS_OVERRUN)) { - /* If the SISL_RC_FLAGS_OVERRUN flag was set, - * then we will handle this error else where. - * If not then we must handle it here. - * This is probably an AFU bug. - */ - scp->result = (DID_ERROR << 16); - } - break; - case SISL_FC_RC_RESIDERR: - /* Resid mismatch between adapter and device */ - case SISL_FC_RC_TGTABORT: - case SISL_FC_RC_ABORTOK: - case SISL_FC_RC_ABORTFAIL: - case SISL_FC_RC_NOLOGI: - case SISL_FC_RC_ABORTPEND: - case SISL_FC_RC_WRABORTPEND: - case SISL_FC_RC_NOEXP: - case SISL_FC_RC_INUSE: - scp->result = (DID_ERROR << 16); - break; - } - } - - if (ioasa->rc.afu_rc) { - /* We have an AFU error */ - switch (ioasa->rc.afu_rc) { - case SISL_AFU_RC_NO_CHANNELS: - scp->result = (DID_NO_CONNECT << 16); - break; - case SISL_AFU_RC_DATA_DMA_ERR: - switch (ioasa->afu_extra) { - case SISL_AFU_DMA_ERR_PAGE_IN: - /* Retry */ - scp->result = (DID_IMM_RETRY << 16); - break; - case SISL_AFU_DMA_ERR_INVALID_EA: - default: - scp->result = (DID_ERROR << 16); - } - break; - case SISL_AFU_RC_OUT_OF_DATA_BUFS: - /* Retry */ - scp->result = (DID_ERROR << 16); - break; - default: - scp->result = (DID_ERROR << 16); - } - } -} - -/** - * cmd_complete() - command completion handler - * @cmd: AFU command that has completed. - * - * For SCSI commands this routine prepares and submits commands that have - * either completed or timed out to the SCSI stack. For internal commands - * (TMF or AFU), this routine simply notifies the originator that the - * command has completed. - */ -static void cmd_complete(struct afu_cmd *cmd) -{ - struct scsi_cmnd *scp; - ulong lock_flags; - struct afu *afu = cmd->parent; - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq = get_hwq(afu, cmd->hwq_index); - - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - list_del(&cmd->list); - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); - - if (cmd->scp) { - scp = cmd->scp; - if (unlikely(cmd->sa.ioasc)) - process_cmd_err(cmd, scp); - else - scp->result = (DID_OK << 16); - - dev_dbg_ratelimited(dev, "%s:scp=%p result=%08x ioasc=%08x\n", - __func__, scp, scp->result, cmd->sa.ioasc); - scsi_done(scp); - } else if (cmd->cmd_tmf) { - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - cfg->tmf_active = false; - wake_up_all_locked(&cfg->tmf_waitq); - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - } else - complete(&cmd->cevent); -} - -/** - * flush_pending_cmds() - flush all pending commands on this hardware queue - * @hwq: Hardware queue to flush. - * - * The hardware send queue lock associated with this hardware queue must be - * held when calling this routine. - */ -static void flush_pending_cmds(struct hwq *hwq) -{ - struct cxlflash_cfg *cfg = hwq->afu->parent; - struct afu_cmd *cmd, *tmp; - struct scsi_cmnd *scp; - ulong lock_flags; - - list_for_each_entry_safe(cmd, tmp, &hwq->pending_cmds, list) { - /* Bypass command when on a doneq, cmd_complete() will handle */ - if (!list_empty(&cmd->queue)) - continue; - - list_del(&cmd->list); - - if (cmd->scp) { - scp = cmd->scp; - scp->result = (DID_IMM_RETRY << 16); - scsi_done(scp); - } else { - cmd->cmd_aborted = true; - - if (cmd->cmd_tmf) { - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - cfg->tmf_active = false; - wake_up_all_locked(&cfg->tmf_waitq); - spin_unlock_irqrestore(&cfg->tmf_slock, - lock_flags); - } else - complete(&cmd->cevent); - } - } -} - -/** - * context_reset() - reset context via specified register - * @hwq: Hardware queue owning the context to be reset. - * @reset_reg: MMIO register to perform reset. - * - * When the reset is successful, the SISLite specification guarantees that - * the AFU has aborted all currently pending I/O. Accordingly, these commands - * must be flushed. - * - * Return: 0 on success, -errno on failure - */ -static int context_reset(struct hwq *hwq, __be64 __iomem *reset_reg) -{ - struct cxlflash_cfg *cfg = hwq->afu->parent; - struct device *dev = &cfg->dev->dev; - int rc = -ETIMEDOUT; - int nretry = 0; - u64 val = 0x1; - ulong lock_flags; - - dev_dbg(dev, "%s: hwq=%p\n", __func__, hwq); - - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - - writeq_be(val, reset_reg); - do { - val = readq_be(reset_reg); - if ((val & 0x1) == 0x0) { - rc = 0; - break; - } - - /* Double delay each time */ - udelay(1 << nretry); - } while (nretry++ < MC_ROOM_RETRY_CNT); - - if (!rc) - flush_pending_cmds(hwq); - - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); - - dev_dbg(dev, "%s: returning rc=%d, val=%016llx nretry=%d\n", - __func__, rc, val, nretry); - return rc; -} - -/** - * context_reset_ioarrin() - reset context via IOARRIN register - * @hwq: Hardware queue owning the context to be reset. - * - * Return: 0 on success, -errno on failure - */ -static int context_reset_ioarrin(struct hwq *hwq) -{ - return context_reset(hwq, &hwq->host_map->ioarrin); -} - -/** - * context_reset_sq() - reset context via SQ_CONTEXT_RESET register - * @hwq: Hardware queue owning the context to be reset. - * - * Return: 0 on success, -errno on failure - */ -static int context_reset_sq(struct hwq *hwq) -{ - return context_reset(hwq, &hwq->host_map->sq_ctx_reset); -} - -/** - * send_cmd_ioarrin() - sends an AFU command via IOARRIN register - * @afu: AFU associated with the host. - * @cmd: AFU command to send. - * - * Return: - * 0 on success, SCSI_MLQUEUE_HOST_BUSY on failure - */ -static int send_cmd_ioarrin(struct afu *afu, struct afu_cmd *cmd) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq = get_hwq(afu, cmd->hwq_index); - int rc = 0; - s64 room; - ulong lock_flags; - - /* - * To avoid the performance penalty of MMIO, spread the update of - * 'room' over multiple commands. - */ - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - if (--hwq->room < 0) { - room = readq_be(&hwq->host_map->cmd_room); - if (room <= 0) { - dev_dbg_ratelimited(dev, "%s: no cmd_room to send " - "0x%02X, room=0x%016llX\n", - __func__, cmd->rcb.cdb[0], room); - hwq->room = 0; - rc = SCSI_MLQUEUE_HOST_BUSY; - goto out; - } - hwq->room = room - 1; - } - - list_add(&cmd->list, &hwq->pending_cmds); - writeq_be((u64)&cmd->rcb, &hwq->host_map->ioarrin); -out: - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); - dev_dbg_ratelimited(dev, "%s: cmd=%p len=%u ea=%016llx rc=%d\n", - __func__, cmd, cmd->rcb.data_len, cmd->rcb.data_ea, rc); - return rc; -} - -/** - * send_cmd_sq() - sends an AFU command via SQ ring - * @afu: AFU associated with the host. - * @cmd: AFU command to send. - * - * Return: - * 0 on success, SCSI_MLQUEUE_HOST_BUSY on failure - */ -static int send_cmd_sq(struct afu *afu, struct afu_cmd *cmd) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq = get_hwq(afu, cmd->hwq_index); - int rc = 0; - int newval; - ulong lock_flags; - - newval = atomic_dec_if_positive(&hwq->hsq_credits); - if (newval <= 0) { - rc = SCSI_MLQUEUE_HOST_BUSY; - goto out; - } - - cmd->rcb.ioasa = &cmd->sa; - - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - - *hwq->hsq_curr = cmd->rcb; - if (hwq->hsq_curr < hwq->hsq_end) - hwq->hsq_curr++; - else - hwq->hsq_curr = hwq->hsq_start; - - list_add(&cmd->list, &hwq->pending_cmds); - writeq_be((u64)hwq->hsq_curr, &hwq->host_map->sq_tail); - - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); -out: - dev_dbg(dev, "%s: cmd=%p len=%u ea=%016llx ioasa=%p rc=%d curr=%p " - "head=%016llx tail=%016llx\n", __func__, cmd, cmd->rcb.data_len, - cmd->rcb.data_ea, cmd->rcb.ioasa, rc, hwq->hsq_curr, - readq_be(&hwq->host_map->sq_head), - readq_be(&hwq->host_map->sq_tail)); - return rc; -} - -/** - * wait_resp() - polls for a response or timeout to a sent AFU command - * @afu: AFU associated with the host. - * @cmd: AFU command that was sent. - * - * Return: 0 on success, -errno on failure - */ -static int wait_resp(struct afu *afu, struct afu_cmd *cmd) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - int rc = 0; - ulong timeout = msecs_to_jiffies(cmd->rcb.timeout * 2 * 1000); - - timeout = wait_for_completion_timeout(&cmd->cevent, timeout); - if (!timeout) - rc = -ETIMEDOUT; - - if (cmd->cmd_aborted) - rc = -EAGAIN; - - if (unlikely(cmd->sa.ioasc != 0)) { - dev_err(dev, "%s: cmd %02x failed, ioasc=%08x\n", - __func__, cmd->rcb.cdb[0], cmd->sa.ioasc); - rc = -EIO; - } - - return rc; -} - -/** - * cmd_to_target_hwq() - selects a target hardware queue for a SCSI command - * @host: SCSI host associated with device. - * @scp: SCSI command to send. - * @afu: SCSI command to send. - * - * Hashes a command based upon the hardware queue mode. - * - * Return: Trusted index of target hardware queue - */ -static u32 cmd_to_target_hwq(struct Scsi_Host *host, struct scsi_cmnd *scp, - struct afu *afu) -{ - u32 tag; - u32 hwq = 0; - - if (afu->num_hwqs == 1) - return 0; - - switch (afu->hwq_mode) { - case HWQ_MODE_RR: - hwq = afu->hwq_rr_count++ % afu->num_hwqs; - break; - case HWQ_MODE_TAG: - tag = blk_mq_unique_tag(scsi_cmd_to_rq(scp)); - hwq = blk_mq_unique_tag_to_hwq(tag); - break; - case HWQ_MODE_CPU: - hwq = smp_processor_id() % afu->num_hwqs; - break; - default: - WARN_ON_ONCE(1); - } - - return hwq; -} - -/** - * send_tmf() - sends a Task Management Function (TMF) - * @cfg: Internal structure associated with the host. - * @sdev: SCSI device destined for TMF. - * @tmfcmd: TMF command to send. - * - * Return: - * 0 on success, SCSI_MLQUEUE_HOST_BUSY or -errno on failure - */ -static int send_tmf(struct cxlflash_cfg *cfg, struct scsi_device *sdev, - u64 tmfcmd) -{ - struct afu *afu = cfg->afu; - struct afu_cmd *cmd = NULL; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq = get_hwq(afu, PRIMARY_HWQ); - bool needs_deletion = false; - char *buf = NULL; - ulong lock_flags; - int rc = 0; - ulong to; - - buf = kzalloc(sizeof(*cmd) + __alignof__(*cmd) - 1, GFP_KERNEL); - if (unlikely(!buf)) { - dev_err(dev, "%s: no memory for command\n", __func__); - rc = -ENOMEM; - goto out; - } - - cmd = (struct afu_cmd *)PTR_ALIGN(buf, __alignof__(*cmd)); - INIT_LIST_HEAD(&cmd->queue); - - /* When Task Management Function is active do not send another */ - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - if (cfg->tmf_active) - wait_event_interruptible_lock_irq(cfg->tmf_waitq, - !cfg->tmf_active, - cfg->tmf_slock); - cfg->tmf_active = true; - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - - cmd->parent = afu; - cmd->cmd_tmf = true; - cmd->hwq_index = hwq->index; - - cmd->rcb.ctx_id = hwq->ctx_hndl; - cmd->rcb.msi = SISL_MSI_RRQ_UPDATED; - cmd->rcb.port_sel = CHAN2PORTMASK(sdev->channel); - cmd->rcb.lun_id = lun_to_lunid(sdev->lun); - cmd->rcb.req_flags = (SISL_REQ_FLAGS_PORT_LUN_ID | - SISL_REQ_FLAGS_SUP_UNDERRUN | - SISL_REQ_FLAGS_TMF_CMD); - memcpy(cmd->rcb.cdb, &tmfcmd, sizeof(tmfcmd)); - - rc = afu->send_cmd(afu, cmd); - if (unlikely(rc)) { - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - cfg->tmf_active = false; - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - goto out; - } - - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - to = msecs_to_jiffies(5000); - to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq, - !cfg->tmf_active, - cfg->tmf_slock, - to); - if (!to) { - dev_err(dev, "%s: TMF timed out\n", __func__); - rc = -ETIMEDOUT; - needs_deletion = true; - } else if (cmd->cmd_aborted) { - dev_err(dev, "%s: TMF aborted\n", __func__); - rc = -EAGAIN; - } else if (cmd->sa.ioasc) { - dev_err(dev, "%s: TMF failed ioasc=%08x\n", - __func__, cmd->sa.ioasc); - rc = -EIO; - } - cfg->tmf_active = false; - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - - if (needs_deletion) { - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - list_del(&cmd->list); - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); - } -out: - kfree(buf); - return rc; -} - -/** - * cxlflash_driver_info() - information handler for this host driver - * @host: SCSI host associated with device. - * - * Return: A string describing the device. - */ -static const char *cxlflash_driver_info(struct Scsi_Host *host) -{ - return CXLFLASH_ADAPTER_NAME; -} - -/** - * cxlflash_queuecommand() - sends a mid-layer request - * @host: SCSI host associated with device. - * @scp: SCSI command to send. - * - * Return: 0 on success, SCSI_MLQUEUE_HOST_BUSY on failure - */ -static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) -{ - struct cxlflash_cfg *cfg = shost_priv(host); - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct afu_cmd *cmd = sc_to_afuci(scp); - struct scatterlist *sg = scsi_sglist(scp); - int hwq_index = cmd_to_target_hwq(host, scp, afu); - struct hwq *hwq = get_hwq(afu, hwq_index); - u16 req_flags = SISL_REQ_FLAGS_SUP_UNDERRUN; - ulong lock_flags; - int rc = 0; - - dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu " - "cdb=(%08x-%08x-%08x-%08x)\n", - __func__, scp, host->host_no, scp->device->channel, - scp->device->id, scp->device->lun, - get_unaligned_be32(&((u32 *)scp->cmnd)[0]), - get_unaligned_be32(&((u32 *)scp->cmnd)[1]), - get_unaligned_be32(&((u32 *)scp->cmnd)[2]), - get_unaligned_be32(&((u32 *)scp->cmnd)[3])); - - /* - * If a Task Management Function is active, wait for it to complete - * before continuing with regular commands. - */ - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - if (cfg->tmf_active) { - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - rc = SCSI_MLQUEUE_HOST_BUSY; - goto out; - } - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - - switch (cfg->state) { - case STATE_PROBING: - case STATE_PROBED: - case STATE_RESET: - dev_dbg_ratelimited(dev, "%s: device is in reset\n", __func__); - rc = SCSI_MLQUEUE_HOST_BUSY; - goto out; - case STATE_FAILTERM: - dev_dbg_ratelimited(dev, "%s: device has failed\n", __func__); - scp->result = (DID_NO_CONNECT << 16); - scsi_done(scp); - rc = 0; - goto out; - default: - atomic_inc(&afu->cmds_active); - break; - } - - if (likely(sg)) { - cmd->rcb.data_len = sg->length; - cmd->rcb.data_ea = (uintptr_t)sg_virt(sg); - } - - cmd->scp = scp; - cmd->parent = afu; - cmd->hwq_index = hwq_index; - - cmd->sa.ioasc = 0; - cmd->rcb.ctx_id = hwq->ctx_hndl; - cmd->rcb.msi = SISL_MSI_RRQ_UPDATED; - cmd->rcb.port_sel = CHAN2PORTMASK(scp->device->channel); - cmd->rcb.lun_id = lun_to_lunid(scp->device->lun); - - if (scp->sc_data_direction == DMA_TO_DEVICE) - req_flags |= SISL_REQ_FLAGS_HOST_WRITE; - - cmd->rcb.req_flags = req_flags; - memcpy(cmd->rcb.cdb, scp->cmnd, sizeof(cmd->rcb.cdb)); - - rc = afu->send_cmd(afu, cmd); - atomic_dec(&afu->cmds_active); -out: - return rc; -} - -/** - * cxlflash_wait_for_pci_err_recovery() - wait for error recovery during probe - * @cfg: Internal structure associated with the host. - */ -static void cxlflash_wait_for_pci_err_recovery(struct cxlflash_cfg *cfg) -{ - struct pci_dev *pdev = cfg->dev; - - if (pci_channel_offline(pdev)) - wait_event_timeout(cfg->reset_waitq, - !pci_channel_offline(pdev), - CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT); -} - -/** - * free_mem() - free memory associated with the AFU - * @cfg: Internal structure associated with the host. - */ -static void free_mem(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - - if (cfg->afu) { - free_pages((ulong)afu, get_order(sizeof(struct afu))); - cfg->afu = NULL; - } -} - -/** - * cxlflash_reset_sync() - synchronizing point for asynchronous resets - * @cfg: Internal structure associated with the host. - */ -static void cxlflash_reset_sync(struct cxlflash_cfg *cfg) -{ - if (cfg->async_reset_cookie == 0) - return; - - /* Wait until all async calls prior to this cookie have completed */ - async_synchronize_cookie(cfg->async_reset_cookie + 1); - cfg->async_reset_cookie = 0; -} - -/** - * stop_afu() - stops the AFU command timers and unmaps the MMIO space - * @cfg: Internal structure associated with the host. - * - * Safe to call with AFU in a partially allocated/initialized state. - * - * Cancels scheduled worker threads, waits for any active internal AFU - * commands to timeout, disables IRQ polling and then unmaps the MMIO space. - */ -static void stop_afu(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - struct hwq *hwq; - int i; - - cancel_work_sync(&cfg->work_q); - if (!current_is_async()) - cxlflash_reset_sync(cfg); - - if (likely(afu)) { - while (atomic_read(&afu->cmds_active)) - ssleep(1); - - if (afu_is_irqpoll_enabled(afu)) { - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - irq_poll_disable(&hwq->irqpoll); - } - } - - if (likely(afu->afu_map)) { - cfg->ops->psa_unmap(afu->afu_map); - afu->afu_map = NULL; - } - } -} - -/** - * term_intr() - disables all AFU interrupts - * @cfg: Internal structure associated with the host. - * @level: Depth of allocation, where to begin waterfall tear down. - * @index: Index of the hardware queue. - * - * Safe to call with AFU/MC in partially allocated/initialized state. - */ -static void term_intr(struct cxlflash_cfg *cfg, enum undo_level level, - u32 index) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq; - - if (!afu) { - dev_err(dev, "%s: returning with NULL afu\n", __func__); - return; - } - - hwq = get_hwq(afu, index); - - if (!hwq->ctx_cookie) { - dev_err(dev, "%s: returning with NULL MC\n", __func__); - return; - } - - switch (level) { - case UNMAP_THREE: - /* SISL_MSI_ASYNC_ERROR is setup only for the primary HWQ */ - if (index == PRIMARY_HWQ) - cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 3, hwq); - fallthrough; - case UNMAP_TWO: - cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 2, hwq); - fallthrough; - case UNMAP_ONE: - cfg->ops->unmap_afu_irq(hwq->ctx_cookie, 1, hwq); - fallthrough; - case FREE_IRQ: - cfg->ops->free_afu_irqs(hwq->ctx_cookie); - fallthrough; - case UNDO_NOOP: - /* No action required */ - break; - } -} - -/** - * term_mc() - terminates the master context - * @cfg: Internal structure associated with the host. - * @index: Index of the hardware queue. - * - * Safe to call with AFU/MC in partially allocated/initialized state. - */ -static void term_mc(struct cxlflash_cfg *cfg, u32 index) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq; - ulong lock_flags; - - if (!afu) { - dev_err(dev, "%s: returning with NULL afu\n", __func__); - return; - } - - hwq = get_hwq(afu, index); - - if (!hwq->ctx_cookie) { - dev_err(dev, "%s: returning with NULL MC\n", __func__); - return; - } - - WARN_ON(cfg->ops->stop_context(hwq->ctx_cookie)); - if (index != PRIMARY_HWQ) - WARN_ON(cfg->ops->release_context(hwq->ctx_cookie)); - hwq->ctx_cookie = NULL; - - spin_lock_irqsave(&hwq->hrrq_slock, lock_flags); - hwq->hrrq_online = false; - spin_unlock_irqrestore(&hwq->hrrq_slock, lock_flags); - - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - flush_pending_cmds(hwq); - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); -} - -/** - * term_afu() - terminates the AFU - * @cfg: Internal structure associated with the host. - * - * Safe to call with AFU/MC in partially allocated/initialized state. - */ -static void term_afu(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - int k; - - /* - * Tear down is carefully orchestrated to ensure - * no interrupts can come in when the problem state - * area is unmapped. - * - * 1) Disable all AFU interrupts for each master - * 2) Unmap the problem state area - * 3) Stop each master context - */ - for (k = cfg->afu->num_hwqs - 1; k >= 0; k--) - term_intr(cfg, UNMAP_THREE, k); - - stop_afu(cfg); - - for (k = cfg->afu->num_hwqs - 1; k >= 0; k--) - term_mc(cfg, k); - - dev_dbg(dev, "%s: returning\n", __func__); -} - -/** - * notify_shutdown() - notifies device of pending shutdown - * @cfg: Internal structure associated with the host. - * @wait: Whether to wait for shutdown processing to complete. - * - * This function will notify the AFU that the adapter is being shutdown - * and will wait for shutdown processing to complete if wait is true. - * This notification should flush pending I/Os to the device and halt - * further I/Os until the next AFU reset is issued and device restarted. - */ -static void notify_shutdown(struct cxlflash_cfg *cfg, bool wait) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct dev_dependent_vals *ddv; - __be64 __iomem *fc_port_regs; - u64 reg, status; - int i, retry_cnt = 0; - - ddv = (struct dev_dependent_vals *)cfg->dev_id->driver_data; - if (!(ddv->flags & CXLFLASH_NOTIFY_SHUTDOWN)) - return; - - if (!afu || !afu->afu_map) { - dev_dbg(dev, "%s: Problem state area not mapped\n", __func__); - return; - } - - /* Notify AFU */ - for (i = 0; i < cfg->num_fc_ports; i++) { - fc_port_regs = get_fc_port_regs(cfg, i); - - reg = readq_be(&fc_port_regs[FC_CONFIG2 / 8]); - reg |= SISL_FC_SHUTDOWN_NORMAL; - writeq_be(reg, &fc_port_regs[FC_CONFIG2 / 8]); - } - - if (!wait) - return; - - /* Wait up to 1.5 seconds for shutdown processing to complete */ - for (i = 0; i < cfg->num_fc_ports; i++) { - fc_port_regs = get_fc_port_regs(cfg, i); - retry_cnt = 0; - - while (true) { - status = readq_be(&fc_port_regs[FC_STATUS / 8]); - if (status & SISL_STATUS_SHUTDOWN_COMPLETE) - break; - if (++retry_cnt >= MC_RETRY_CNT) { - dev_dbg(dev, "%s: port %d shutdown processing " - "not yet completed\n", __func__, i); - break; - } - msleep(100 * retry_cnt); - } - } -} - -/** - * cxlflash_get_minor() - gets the first available minor number - * - * Return: Unique minor number that can be used to create the character device. - */ -static int cxlflash_get_minor(void) -{ - int minor; - long bit; - - bit = find_first_zero_bit(cxlflash_minor, CXLFLASH_MAX_ADAPTERS); - if (bit >= CXLFLASH_MAX_ADAPTERS) - return -1; - - minor = bit & MINORMASK; - set_bit(minor, cxlflash_minor); - return minor; -} - -/** - * cxlflash_put_minor() - releases the minor number - * @minor: Minor number that is no longer needed. - */ -static void cxlflash_put_minor(int minor) -{ - clear_bit(minor, cxlflash_minor); -} - -/** - * cxlflash_release_chrdev() - release the character device for the host - * @cfg: Internal structure associated with the host. - */ -static void cxlflash_release_chrdev(struct cxlflash_cfg *cfg) -{ - device_unregister(cfg->chardev); - cfg->chardev = NULL; - cdev_del(&cfg->cdev); - cxlflash_put_minor(MINOR(cfg->cdev.dev)); -} - -/** - * cxlflash_remove() - PCI entry point to tear down host - * @pdev: PCI device associated with the host. - * - * Safe to use as a cleanup in partially allocated/initialized state. Note that - * the reset_waitq is flushed as part of the stop/termination of user contexts. - */ -static void cxlflash_remove(struct pci_dev *pdev) -{ - struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); - struct device *dev = &pdev->dev; - ulong lock_flags; - - if (!pci_is_enabled(pdev)) { - dev_dbg(dev, "%s: Device is disabled\n", __func__); - return; - } - - /* Yield to running recovery threads before continuing with remove */ - wait_event(cfg->reset_waitq, cfg->state != STATE_RESET && - cfg->state != STATE_PROBING); - spin_lock_irqsave(&cfg->tmf_slock, lock_flags); - if (cfg->tmf_active) - wait_event_interruptible_lock_irq(cfg->tmf_waitq, - !cfg->tmf_active, - cfg->tmf_slock); - spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags); - - /* Notify AFU and wait for shutdown processing to complete */ - notify_shutdown(cfg, true); - - cfg->state = STATE_FAILTERM; - cxlflash_stop_term_user_contexts(cfg); - - switch (cfg->init_state) { - case INIT_STATE_CDEV: - cxlflash_release_chrdev(cfg); - fallthrough; - case INIT_STATE_SCSI: - cxlflash_term_local_luns(cfg); - scsi_remove_host(cfg->host); - fallthrough; - case INIT_STATE_AFU: - term_afu(cfg); - fallthrough; - case INIT_STATE_PCI: - cfg->ops->destroy_afu(cfg->afu_cookie); - pci_disable_device(pdev); - fallthrough; - case INIT_STATE_NONE: - free_mem(cfg); - scsi_host_put(cfg->host); - break; - } - - dev_dbg(dev, "%s: returning\n", __func__); -} - -/** - * alloc_mem() - allocates the AFU and its command pool - * @cfg: Internal structure associated with the host. - * - * A partially allocated state remains on failure. - * - * Return: - * 0 on success - * -ENOMEM on failure to allocate memory - */ -static int alloc_mem(struct cxlflash_cfg *cfg) -{ - int rc = 0; - struct device *dev = &cfg->dev->dev; - - /* AFU is ~28k, i.e. only one 64k page or up to seven 4k pages */ - cfg->afu = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(sizeof(struct afu))); - if (unlikely(!cfg->afu)) { - dev_err(dev, "%s: cannot get %d free pages\n", - __func__, get_order(sizeof(struct afu))); - rc = -ENOMEM; - goto out; - } - cfg->afu->parent = cfg; - cfg->afu->desired_hwqs = CXLFLASH_DEF_HWQS; - cfg->afu->afu_map = NULL; -out: - return rc; -} - -/** - * init_pci() - initializes the host as a PCI device - * @cfg: Internal structure associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int init_pci(struct cxlflash_cfg *cfg) -{ - struct pci_dev *pdev = cfg->dev; - struct device *dev = &cfg->dev->dev; - int rc = 0; - - rc = pci_enable_device(pdev); - if (rc || pci_channel_offline(pdev)) { - if (pci_channel_offline(pdev)) { - cxlflash_wait_for_pci_err_recovery(cfg); - rc = pci_enable_device(pdev); - } - - if (rc) { - dev_err(dev, "%s: Cannot enable adapter\n", __func__); - cxlflash_wait_for_pci_err_recovery(cfg); - goto out; - } - } - -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * init_scsi() - adds the host to the SCSI stack and kicks off host scan - * @cfg: Internal structure associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int init_scsi(struct cxlflash_cfg *cfg) -{ - struct pci_dev *pdev = cfg->dev; - struct device *dev = &cfg->dev->dev; - int rc = 0; - - rc = scsi_add_host(cfg->host, &pdev->dev); - if (rc) { - dev_err(dev, "%s: scsi_add_host failed rc=%d\n", __func__, rc); - goto out; - } - - scsi_scan_host(cfg->host); - -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * set_port_online() - transitions the specified host FC port to online state - * @fc_regs: Top of MMIO region defined for specified port. - * - * The provided MMIO region must be mapped prior to call. Online state means - * that the FC link layer has synced, completed the handshaking process, and - * is ready for login to start. - */ -static void set_port_online(__be64 __iomem *fc_regs) -{ - u64 cmdcfg; - - cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]); - cmdcfg &= (~FC_MTIP_CMDCONFIG_OFFLINE); /* clear OFF_LINE */ - cmdcfg |= (FC_MTIP_CMDCONFIG_ONLINE); /* set ON_LINE */ - writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]); -} - -/** - * set_port_offline() - transitions the specified host FC port to offline state - * @fc_regs: Top of MMIO region defined for specified port. - * - * The provided MMIO region must be mapped prior to call. - */ -static void set_port_offline(__be64 __iomem *fc_regs) -{ - u64 cmdcfg; - - cmdcfg = readq_be(&fc_regs[FC_MTIP_CMDCONFIG / 8]); - cmdcfg &= (~FC_MTIP_CMDCONFIG_ONLINE); /* clear ON_LINE */ - cmdcfg |= (FC_MTIP_CMDCONFIG_OFFLINE); /* set OFF_LINE */ - writeq_be(cmdcfg, &fc_regs[FC_MTIP_CMDCONFIG / 8]); -} - -/** - * wait_port_online() - waits for the specified host FC port come online - * @fc_regs: Top of MMIO region defined for specified port. - * @delay_us: Number of microseconds to delay between reading port status. - * @nretry: Number of cycles to retry reading port status. - * - * The provided MMIO region must be mapped prior to call. This will timeout - * when the cable is not plugged in. - * - * Return: - * TRUE (1) when the specified port is online - * FALSE (0) when the specified port fails to come online after timeout - */ -static bool wait_port_online(__be64 __iomem *fc_regs, u32 delay_us, u32 nretry) -{ - u64 status; - - WARN_ON(delay_us < 1000); - - do { - msleep(delay_us / 1000); - status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]); - if (status == U64_MAX) - nretry /= 2; - } while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_ONLINE && - nretry--); - - return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_ONLINE); -} - -/** - * wait_port_offline() - waits for the specified host FC port go offline - * @fc_regs: Top of MMIO region defined for specified port. - * @delay_us: Number of microseconds to delay between reading port status. - * @nretry: Number of cycles to retry reading port status. - * - * The provided MMIO region must be mapped prior to call. - * - * Return: - * TRUE (1) when the specified port is offline - * FALSE (0) when the specified port fails to go offline after timeout - */ -static bool wait_port_offline(__be64 __iomem *fc_regs, u32 delay_us, u32 nretry) -{ - u64 status; - - WARN_ON(delay_us < 1000); - - do { - msleep(delay_us / 1000); - status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]); - if (status == U64_MAX) - nretry /= 2; - } while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_OFFLINE && - nretry--); - - return ((status & FC_MTIP_STATUS_MASK) == FC_MTIP_STATUS_OFFLINE); -} - -/** - * afu_set_wwpn() - configures the WWPN for the specified host FC port - * @afu: AFU associated with the host that owns the specified FC port. - * @port: Port number being configured. - * @fc_regs: Top of MMIO region defined for specified port. - * @wwpn: The world-wide-port-number previously discovered for port. - * - * The provided MMIO region must be mapped prior to call. As part of the - * sequence to configure the WWPN, the port is toggled offline and then back - * online. This toggling action can cause this routine to delay up to a few - * seconds. When configured to use the internal LUN feature of the AFU, a - * failure to come online is overridden. - */ -static void afu_set_wwpn(struct afu *afu, int port, __be64 __iomem *fc_regs, - u64 wwpn) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - - set_port_offline(fc_regs); - if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US, - FC_PORT_STATUS_RETRY_CNT)) { - dev_dbg(dev, "%s: wait on port %d to go offline timed out\n", - __func__, port); - } - - writeq_be(wwpn, &fc_regs[FC_PNAME / 8]); - - set_port_online(fc_regs); - if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US, - FC_PORT_STATUS_RETRY_CNT)) { - dev_dbg(dev, "%s: wait on port %d to go online timed out\n", - __func__, port); - } -} - -/** - * afu_link_reset() - resets the specified host FC port - * @afu: AFU associated with the host that owns the specified FC port. - * @port: Port number being configured. - * @fc_regs: Top of MMIO region defined for specified port. - * - * The provided MMIO region must be mapped prior to call. The sequence to - * reset the port involves toggling it offline and then back online. This - * action can cause this routine to delay up to a few seconds. An effort - * is made to maintain link with the device by switching to host to use - * the alternate port exclusively while the reset takes place. - * failure to come online is overridden. - */ -static void afu_link_reset(struct afu *afu, int port, __be64 __iomem *fc_regs) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - u64 port_sel; - - /* first switch the AFU to the other links, if any */ - port_sel = readq_be(&afu->afu_map->global.regs.afu_port_sel); - port_sel &= ~(1ULL << port); - writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel); - cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC); - - set_port_offline(fc_regs); - if (!wait_port_offline(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US, - FC_PORT_STATUS_RETRY_CNT)) - dev_err(dev, "%s: wait on port %d to go offline timed out\n", - __func__, port); - - set_port_online(fc_regs); - if (!wait_port_online(fc_regs, FC_PORT_STATUS_RETRY_INTERVAL_US, - FC_PORT_STATUS_RETRY_CNT)) - dev_err(dev, "%s: wait on port %d to go online timed out\n", - __func__, port); - - /* switch back to include this port */ - port_sel |= (1ULL << port); - writeq_be(port_sel, &afu->afu_map->global.regs.afu_port_sel); - cxlflash_afu_sync(afu, 0, 0, AFU_GSYNC); - - dev_dbg(dev, "%s: returning port_sel=%016llx\n", __func__, port_sel); -} - -/** - * afu_err_intr_init() - clears and initializes the AFU for error interrupts - * @afu: AFU associated with the host. - */ -static void afu_err_intr_init(struct afu *afu) -{ - struct cxlflash_cfg *cfg = afu->parent; - __be64 __iomem *fc_port_regs; - int i; - struct hwq *hwq = get_hwq(afu, PRIMARY_HWQ); - u64 reg; - - /* global async interrupts: AFU clears afu_ctrl on context exit - * if async interrupts were sent to that context. This prevents - * the AFU form sending further async interrupts when - * there is - * nobody to receive them. - */ - - /* mask all */ - writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_mask); - /* set LISN# to send and point to primary master context */ - reg = ((u64) (((hwq->ctx_hndl << 8) | SISL_MSI_ASYNC_ERROR)) << 40); - - if (afu->internal_lun) - reg |= 1; /* Bit 63 indicates local lun */ - writeq_be(reg, &afu->afu_map->global.regs.afu_ctrl); - /* clear all */ - writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear); - /* unmask bits that are of interest */ - /* note: afu can send an interrupt after this step */ - writeq_be(SISL_ASTATUS_MASK, &afu->afu_map->global.regs.aintr_mask); - /* clear again in case a bit came on after previous clear but before */ - /* unmask */ - writeq_be(-1ULL, &afu->afu_map->global.regs.aintr_clear); - - /* Clear/Set internal lun bits */ - fc_port_regs = get_fc_port_regs(cfg, 0); - reg = readq_be(&fc_port_regs[FC_CONFIG2 / 8]); - reg &= SISL_FC_INTERNAL_MASK; - if (afu->internal_lun) - reg |= ((u64)(afu->internal_lun - 1) << SISL_FC_INTERNAL_SHIFT); - writeq_be(reg, &fc_port_regs[FC_CONFIG2 / 8]); - - /* now clear FC errors */ - for (i = 0; i < cfg->num_fc_ports; i++) { - fc_port_regs = get_fc_port_regs(cfg, i); - - writeq_be(0xFFFFFFFFU, &fc_port_regs[FC_ERROR / 8]); - writeq_be(0, &fc_port_regs[FC_ERRCAP / 8]); - } - - /* sync interrupts for master's IOARRIN write */ - /* note that unlike asyncs, there can be no pending sync interrupts */ - /* at this time (this is a fresh context and master has not written */ - /* IOARRIN yet), so there is nothing to clear. */ - - /* set LISN#, it is always sent to the context that wrote IOARRIN */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - reg = readq_be(&hwq->host_map->ctx_ctrl); - WARN_ON((reg & SISL_CTX_CTRL_LISN_MASK) != 0); - reg |= SISL_MSI_SYNC_ERROR; - writeq_be(reg, &hwq->host_map->ctx_ctrl); - writeq_be(SISL_ISTATUS_MASK, &hwq->host_map->intr_mask); - } -} - -/** - * cxlflash_sync_err_irq() - interrupt handler for synchronous errors - * @irq: Interrupt number. - * @data: Private data provided at interrupt registration, the AFU. - * - * Return: Always return IRQ_HANDLED. - */ -static irqreturn_t cxlflash_sync_err_irq(int irq, void *data) -{ - struct hwq *hwq = (struct hwq *)data; - struct cxlflash_cfg *cfg = hwq->afu->parent; - struct device *dev = &cfg->dev->dev; - u64 reg; - u64 reg_unmasked; - - reg = readq_be(&hwq->host_map->intr_status); - reg_unmasked = (reg & SISL_ISTATUS_UNMASK); - - if (reg_unmasked == 0UL) { - dev_err(dev, "%s: spurious interrupt, intr_status=%016llx\n", - __func__, reg); - goto cxlflash_sync_err_irq_exit; - } - - dev_err(dev, "%s: unexpected interrupt, intr_status=%016llx\n", - __func__, reg); - - writeq_be(reg_unmasked, &hwq->host_map->intr_clear); - -cxlflash_sync_err_irq_exit: - return IRQ_HANDLED; -} - -/** - * process_hrrq() - process the read-response queue - * @hwq: HWQ associated with the host. - * @doneq: Queue of commands harvested from the RRQ. - * @budget: Threshold of RRQ entries to process. - * - * This routine must be called holding the disabled RRQ spin lock. - * - * Return: The number of entries processed. - */ -static int process_hrrq(struct hwq *hwq, struct list_head *doneq, int budget) -{ - struct afu *afu = hwq->afu; - struct afu_cmd *cmd; - struct sisl_ioasa *ioasa; - struct sisl_ioarcb *ioarcb; - bool toggle = hwq->toggle; - int num_hrrq = 0; - u64 entry, - *hrrq_start = hwq->hrrq_start, - *hrrq_end = hwq->hrrq_end, - *hrrq_curr = hwq->hrrq_curr; - - /* Process ready RRQ entries up to the specified budget (if any) */ - while (true) { - entry = *hrrq_curr; - - if ((entry & SISL_RESP_HANDLE_T_BIT) != toggle) - break; - - entry &= ~SISL_RESP_HANDLE_T_BIT; - - if (afu_is_sq_cmd_mode(afu)) { - ioasa = (struct sisl_ioasa *)entry; - cmd = container_of(ioasa, struct afu_cmd, sa); - } else { - ioarcb = (struct sisl_ioarcb *)entry; - cmd = container_of(ioarcb, struct afu_cmd, rcb); - } - - list_add_tail(&cmd->queue, doneq); - - /* Advance to next entry or wrap and flip the toggle bit */ - if (hrrq_curr < hrrq_end) - hrrq_curr++; - else { - hrrq_curr = hrrq_start; - toggle ^= SISL_RESP_HANDLE_T_BIT; - } - - atomic_inc(&hwq->hsq_credits); - num_hrrq++; - - if (budget > 0 && num_hrrq >= budget) - break; - } - - hwq->hrrq_curr = hrrq_curr; - hwq->toggle = toggle; - - return num_hrrq; -} - -/** - * process_cmd_doneq() - process a queue of harvested RRQ commands - * @doneq: Queue of completed commands. - * - * Note that upon return the queue can no longer be trusted. - */ -static void process_cmd_doneq(struct list_head *doneq) -{ - struct afu_cmd *cmd, *tmp; - - WARN_ON(list_empty(doneq)); - - list_for_each_entry_safe(cmd, tmp, doneq, queue) - cmd_complete(cmd); -} - -/** - * cxlflash_irqpoll() - process a queue of harvested RRQ commands - * @irqpoll: IRQ poll structure associated with queue to poll. - * @budget: Threshold of RRQ entries to process per poll. - * - * Return: The number of entries processed. - */ -static int cxlflash_irqpoll(struct irq_poll *irqpoll, int budget) -{ - struct hwq *hwq = container_of(irqpoll, struct hwq, irqpoll); - unsigned long hrrq_flags; - LIST_HEAD(doneq); - int num_entries = 0; - - spin_lock_irqsave(&hwq->hrrq_slock, hrrq_flags); - - num_entries = process_hrrq(hwq, &doneq, budget); - if (num_entries < budget) - irq_poll_complete(irqpoll); - - spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags); - - process_cmd_doneq(&doneq); - return num_entries; -} - -/** - * cxlflash_rrq_irq() - interrupt handler for read-response queue (normal path) - * @irq: Interrupt number. - * @data: Private data provided at interrupt registration, the AFU. - * - * Return: IRQ_HANDLED or IRQ_NONE when no ready entries found. - */ -static irqreturn_t cxlflash_rrq_irq(int irq, void *data) -{ - struct hwq *hwq = (struct hwq *)data; - struct afu *afu = hwq->afu; - unsigned long hrrq_flags; - LIST_HEAD(doneq); - int num_entries = 0; - - spin_lock_irqsave(&hwq->hrrq_slock, hrrq_flags); - - /* Silently drop spurious interrupts when queue is not online */ - if (!hwq->hrrq_online) { - spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags); - return IRQ_HANDLED; - } - - if (afu_is_irqpoll_enabled(afu)) { - irq_poll_sched(&hwq->irqpoll); - spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags); - return IRQ_HANDLED; - } - - num_entries = process_hrrq(hwq, &doneq, -1); - spin_unlock_irqrestore(&hwq->hrrq_slock, hrrq_flags); - - if (num_entries == 0) - return IRQ_NONE; - - process_cmd_doneq(&doneq); - return IRQ_HANDLED; -} - -/* - * Asynchronous interrupt information table - * - * NOTE: - * - Order matters here as this array is indexed by bit position. - * - * - The checkpatch script considers the BUILD_SISL_ASTATUS_FC_PORT macro - * as complex and complains due to a lack of parentheses/braces. - */ -#define ASTATUS_FC(_a, _b, _c, _d) \ - { SISL_ASTATUS_FC##_a##_##_b, _c, _a, (_d) } - -#define BUILD_SISL_ASTATUS_FC_PORT(_a) \ - ASTATUS_FC(_a, LINK_UP, "link up", 0), \ - ASTATUS_FC(_a, LINK_DN, "link down", 0), \ - ASTATUS_FC(_a, LOGI_S, "login succeeded", SCAN_HOST), \ - ASTATUS_FC(_a, LOGI_F, "login failed", CLR_FC_ERROR), \ - ASTATUS_FC(_a, LOGI_R, "login timed out, retrying", LINK_RESET), \ - ASTATUS_FC(_a, CRC_T, "CRC threshold exceeded", LINK_RESET), \ - ASTATUS_FC(_a, LOGO, "target initiated LOGO", 0), \ - ASTATUS_FC(_a, OTHER, "other error", CLR_FC_ERROR | LINK_RESET) - -static const struct asyc_intr_info ainfo[] = { - BUILD_SISL_ASTATUS_FC_PORT(1), - BUILD_SISL_ASTATUS_FC_PORT(0), - BUILD_SISL_ASTATUS_FC_PORT(3), - BUILD_SISL_ASTATUS_FC_PORT(2) -}; - -/** - * cxlflash_async_err_irq() - interrupt handler for asynchronous errors - * @irq: Interrupt number. - * @data: Private data provided at interrupt registration, the AFU. - * - * Return: Always return IRQ_HANDLED. - */ -static irqreturn_t cxlflash_async_err_irq(int irq, void *data) -{ - struct hwq *hwq = (struct hwq *)data; - struct afu *afu = hwq->afu; - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - const struct asyc_intr_info *info; - struct sisl_global_map __iomem *global = &afu->afu_map->global; - __be64 __iomem *fc_port_regs; - u64 reg_unmasked; - u64 reg; - u64 bit; - u8 port; - - reg = readq_be(&global->regs.aintr_status); - reg_unmasked = (reg & SISL_ASTATUS_UNMASK); - - if (unlikely(reg_unmasked == 0)) { - dev_err(dev, "%s: spurious interrupt, aintr_status=%016llx\n", - __func__, reg); - goto out; - } - - /* FYI, it is 'okay' to clear AFU status before FC_ERROR */ - writeq_be(reg_unmasked, &global->regs.aintr_clear); - - /* Check each bit that is on */ - for_each_set_bit(bit, (ulong *)®_unmasked, BITS_PER_LONG) { - if (unlikely(bit >= ARRAY_SIZE(ainfo))) { - WARN_ON_ONCE(1); - continue; - } - - info = &ainfo[bit]; - if (unlikely(info->status != 1ULL << bit)) { - WARN_ON_ONCE(1); - continue; - } - - port = info->port; - fc_port_regs = get_fc_port_regs(cfg, port); - - dev_err(dev, "%s: FC Port %d -> %s, fc_status=%016llx\n", - __func__, port, info->desc, - readq_be(&fc_port_regs[FC_STATUS / 8])); - - /* - * Do link reset first, some OTHER errors will set FC_ERROR - * again if cleared before or w/o a reset - */ - if (info->action & LINK_RESET) { - dev_err(dev, "%s: FC Port %d: resetting link\n", - __func__, port); - cfg->lr_state = LINK_RESET_REQUIRED; - cfg->lr_port = port; - schedule_work(&cfg->work_q); - } - - if (info->action & CLR_FC_ERROR) { - reg = readq_be(&fc_port_regs[FC_ERROR / 8]); - - /* - * Since all errors are unmasked, FC_ERROR and FC_ERRCAP - * should be the same and tracing one is sufficient. - */ - - dev_err(dev, "%s: fc %d: clearing fc_error=%016llx\n", - __func__, port, reg); - - writeq_be(reg, &fc_port_regs[FC_ERROR / 8]); - writeq_be(0, &fc_port_regs[FC_ERRCAP / 8]); - } - - if (info->action & SCAN_HOST) { - atomic_inc(&cfg->scan_host_needed); - schedule_work(&cfg->work_q); - } - } - -out: - return IRQ_HANDLED; -} - -/** - * read_vpd() - obtains the WWPNs from VPD - * @cfg: Internal structure associated with the host. - * @wwpn: Array of size MAX_FC_PORTS to pass back WWPNs - * - * Return: 0 on success, -errno on failure - */ -static int read_vpd(struct cxlflash_cfg *cfg, u64 wwpn[]) -{ - struct device *dev = &cfg->dev->dev; - struct pci_dev *pdev = cfg->dev; - int i, k, rc = 0; - unsigned int kw_size; - ssize_t vpd_size; - char vpd_data[CXLFLASH_VPD_LEN]; - char tmp_buf[WWPN_BUF_LEN] = { 0 }; - const struct dev_dependent_vals *ddv = (struct dev_dependent_vals *) - cfg->dev_id->driver_data; - const bool wwpn_vpd_required = ddv->flags & CXLFLASH_WWPN_VPD_REQUIRED; - const char *wwpn_vpd_tags[MAX_FC_PORTS] = { "V5", "V6", "V7", "V8" }; - - /* Get the VPD data from the device */ - vpd_size = cfg->ops->read_adapter_vpd(pdev, vpd_data, sizeof(vpd_data)); - if (unlikely(vpd_size <= 0)) { - dev_err(dev, "%s: Unable to read VPD (size = %ld)\n", - __func__, vpd_size); - rc = -ENODEV; - goto out; - } - - /* - * Find the offset of the WWPN tag within the read only - * VPD data and validate the found field (partials are - * no good to us). Convert the ASCII data to an integer - * value. Note that we must copy to a temporary buffer - * because the conversion service requires that the ASCII - * string be terminated. - * - * Allow for WWPN not being found for all devices, setting - * the returned WWPN to zero when not found. Notify with a - * log error for cards that should have had WWPN keywords - * in the VPD - cards requiring WWPN will not have their - * ports programmed and operate in an undefined state. - */ - for (k = 0; k < cfg->num_fc_ports; k++) { - i = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, - wwpn_vpd_tags[k], &kw_size); - if (i == -ENOENT) { - if (wwpn_vpd_required) - dev_err(dev, "%s: Port %d WWPN not found\n", - __func__, k); - wwpn[k] = 0ULL; - continue; - } - - if (i < 0 || kw_size != WWPN_LEN) { - dev_err(dev, "%s: Port %d WWPN incomplete or bad VPD\n", - __func__, k); - rc = -ENODEV; - goto out; - } - - memcpy(tmp_buf, &vpd_data[i], WWPN_LEN); - rc = kstrtoul(tmp_buf, WWPN_LEN, (ulong *)&wwpn[k]); - if (unlikely(rc)) { - dev_err(dev, "%s: WWPN conversion failed for port %d\n", - __func__, k); - rc = -ENODEV; - goto out; - } - - dev_dbg(dev, "%s: wwpn%d=%016llx\n", __func__, k, wwpn[k]); - } - -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * init_pcr() - initialize the provisioning and control registers - * @cfg: Internal structure associated with the host. - * - * Also sets up fast access to the mapped registers and initializes AFU - * command fields that never change. - */ -static void init_pcr(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - struct sisl_ctrl_map __iomem *ctrl_map; - struct hwq *hwq; - void *cookie; - int i; - - for (i = 0; i < MAX_CONTEXT; i++) { - ctrl_map = &afu->afu_map->ctrls[i].ctrl; - /* Disrupt any clients that could be running */ - /* e.g. clients that survived a master restart */ - writeq_be(0, &ctrl_map->rht_start); - writeq_be(0, &ctrl_map->rht_cnt_id); - writeq_be(0, &ctrl_map->ctx_cap); - } - - /* Copy frequently used fields into hwq */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - cookie = hwq->ctx_cookie; - - hwq->ctx_hndl = (u16) cfg->ops->process_element(cookie); - hwq->host_map = &afu->afu_map->hosts[hwq->ctx_hndl].host; - hwq->ctrl_map = &afu->afu_map->ctrls[hwq->ctx_hndl].ctrl; - - /* Program the Endian Control for the master context */ - writeq_be(SISL_ENDIAN_CTRL, &hwq->host_map->endian_ctrl); - } -} - -/** - * init_global() - initialize AFU global registers - * @cfg: Internal structure associated with the host. - */ -static int init_global(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq; - struct sisl_host_map __iomem *hmap; - __be64 __iomem *fc_port_regs; - u64 wwpn[MAX_FC_PORTS]; /* wwpn of AFU ports */ - int i = 0, num_ports = 0; - int rc = 0; - int j; - void *ctx; - u64 reg; - - rc = read_vpd(cfg, &wwpn[0]); - if (rc) { - dev_err(dev, "%s: could not read vpd rc=%d\n", __func__, rc); - goto out; - } - - /* Set up RRQ and SQ in HWQ for master issued cmds */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - hmap = hwq->host_map; - - writeq_be((u64) hwq->hrrq_start, &hmap->rrq_start); - writeq_be((u64) hwq->hrrq_end, &hmap->rrq_end); - hwq->hrrq_online = true; - - if (afu_is_sq_cmd_mode(afu)) { - writeq_be((u64)hwq->hsq_start, &hmap->sq_start); - writeq_be((u64)hwq->hsq_end, &hmap->sq_end); - } - } - - /* AFU configuration */ - reg = readq_be(&afu->afu_map->global.regs.afu_config); - reg |= SISL_AFUCONF_AR_ALL|SISL_AFUCONF_ENDIAN; - /* enable all auto retry options and control endianness */ - /* leave others at default: */ - /* CTX_CAP write protected, mbox_r does not clear on read and */ - /* checker on if dual afu */ - writeq_be(reg, &afu->afu_map->global.regs.afu_config); - - /* Global port select: select either port */ - if (afu->internal_lun) { - /* Only use port 0 */ - writeq_be(PORT0, &afu->afu_map->global.regs.afu_port_sel); - num_ports = 0; - } else { - writeq_be(PORT_MASK(cfg->num_fc_ports), - &afu->afu_map->global.regs.afu_port_sel); - num_ports = cfg->num_fc_ports; - } - - for (i = 0; i < num_ports; i++) { - fc_port_regs = get_fc_port_regs(cfg, i); - - /* Unmask all errors (but they are still masked at AFU) */ - writeq_be(0, &fc_port_regs[FC_ERRMSK / 8]); - /* Clear CRC error cnt & set a threshold */ - (void)readq_be(&fc_port_regs[FC_CNT_CRCERR / 8]); - writeq_be(MC_CRC_THRESH, &fc_port_regs[FC_CRC_THRESH / 8]); - - /* Set WWPNs. If already programmed, wwpn[i] is 0 */ - if (wwpn[i] != 0) - afu_set_wwpn(afu, i, &fc_port_regs[0], wwpn[i]); - /* Programming WWPN back to back causes additional - * offline/online transitions and a PLOGI - */ - msleep(100); - } - - if (afu_is_ocxl_lisn(afu)) { - /* Set up the LISN effective address for each master */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - ctx = hwq->ctx_cookie; - - for (j = 0; j < hwq->num_irqs; j++) { - reg = cfg->ops->get_irq_objhndl(ctx, j); - writeq_be(reg, &hwq->ctrl_map->lisn_ea[j]); - } - - reg = hwq->ctx_hndl; - writeq_be(SISL_LISN_PASID(reg, reg), - &hwq->ctrl_map->lisn_pasid[0]); - writeq_be(SISL_LISN_PASID(0UL, reg), - &hwq->ctrl_map->lisn_pasid[1]); - } - } - - /* Set up master's own CTX_CAP to allow real mode, host translation */ - /* tables, afu cmds and read/write GSCSI cmds. */ - /* First, unlock ctx_cap write by reading mbox */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - (void)readq_be(&hwq->ctrl_map->mbox_r); /* unlock ctx_cap */ - writeq_be((SISL_CTX_CAP_REAL_MODE | SISL_CTX_CAP_HOST_XLATE | - SISL_CTX_CAP_READ_CMD | SISL_CTX_CAP_WRITE_CMD | - SISL_CTX_CAP_AFU_CMD | SISL_CTX_CAP_GSCSI_CMD), - &hwq->ctrl_map->ctx_cap); - } - - /* - * Determine write-same unmap support for host by evaluating the unmap - * sector support bit of the context control register associated with - * the primary hardware queue. Note that while this status is reflected - * in a context register, the outcome can be assumed to be host-wide. - */ - hwq = get_hwq(afu, PRIMARY_HWQ); - reg = readq_be(&hwq->host_map->ctx_ctrl); - if (reg & SISL_CTX_CTRL_UNMAP_SECTOR) - cfg->ws_unmap = true; - - /* Initialize heartbeat */ - afu->hb = readq_be(&afu->afu_map->global.regs.afu_hb); -out: - return rc; -} - -/** - * start_afu() - initializes and starts the AFU - * @cfg: Internal structure associated with the host. - */ -static int start_afu(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq; - int rc = 0; - int i; - - init_pcr(cfg); - - /* Initialize each HWQ */ - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - /* After an AFU reset, RRQ entries are stale, clear them */ - memset(&hwq->rrq_entry, 0, sizeof(hwq->rrq_entry)); - - /* Initialize RRQ pointers */ - hwq->hrrq_start = &hwq->rrq_entry[0]; - hwq->hrrq_end = &hwq->rrq_entry[NUM_RRQ_ENTRY - 1]; - hwq->hrrq_curr = hwq->hrrq_start; - hwq->toggle = 1; - - /* Initialize spin locks */ - spin_lock_init(&hwq->hrrq_slock); - spin_lock_init(&hwq->hsq_slock); - - /* Initialize SQ */ - if (afu_is_sq_cmd_mode(afu)) { - memset(&hwq->sq, 0, sizeof(hwq->sq)); - hwq->hsq_start = &hwq->sq[0]; - hwq->hsq_end = &hwq->sq[NUM_SQ_ENTRY - 1]; - hwq->hsq_curr = hwq->hsq_start; - - atomic_set(&hwq->hsq_credits, NUM_SQ_ENTRY - 1); - } - - /* Initialize IRQ poll */ - if (afu_is_irqpoll_enabled(afu)) - irq_poll_init(&hwq->irqpoll, afu->irqpoll_weight, - cxlflash_irqpoll); - - } - - rc = init_global(cfg); - - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * init_intr() - setup interrupt handlers for the master context - * @cfg: Internal structure associated with the host. - * @hwq: Hardware queue to initialize. - * - * Return: 0 on success, -errno on failure - */ -static enum undo_level init_intr(struct cxlflash_cfg *cfg, - struct hwq *hwq) -{ - struct device *dev = &cfg->dev->dev; - void *ctx = hwq->ctx_cookie; - int rc = 0; - enum undo_level level = UNDO_NOOP; - bool is_primary_hwq = (hwq->index == PRIMARY_HWQ); - int num_irqs = hwq->num_irqs; - - rc = cfg->ops->allocate_afu_irqs(ctx, num_irqs); - if (unlikely(rc)) { - dev_err(dev, "%s: allocate_afu_irqs failed rc=%d\n", - __func__, rc); - level = UNDO_NOOP; - goto out; - } - - rc = cfg->ops->map_afu_irq(ctx, 1, cxlflash_sync_err_irq, hwq, - "SISL_MSI_SYNC_ERROR"); - if (unlikely(rc <= 0)) { - dev_err(dev, "%s: SISL_MSI_SYNC_ERROR map failed\n", __func__); - level = FREE_IRQ; - goto out; - } - - rc = cfg->ops->map_afu_irq(ctx, 2, cxlflash_rrq_irq, hwq, - "SISL_MSI_RRQ_UPDATED"); - if (unlikely(rc <= 0)) { - dev_err(dev, "%s: SISL_MSI_RRQ_UPDATED map failed\n", __func__); - level = UNMAP_ONE; - goto out; - } - - /* SISL_MSI_ASYNC_ERROR is setup only for the primary HWQ */ - if (!is_primary_hwq) - goto out; - - rc = cfg->ops->map_afu_irq(ctx, 3, cxlflash_async_err_irq, hwq, - "SISL_MSI_ASYNC_ERROR"); - if (unlikely(rc <= 0)) { - dev_err(dev, "%s: SISL_MSI_ASYNC_ERROR map failed\n", __func__); - level = UNMAP_TWO; - goto out; - } -out: - return level; -} - -/** - * init_mc() - create and register as the master context - * @cfg: Internal structure associated with the host. - * @index: HWQ Index of the master context. - * - * Return: 0 on success, -errno on failure - */ -static int init_mc(struct cxlflash_cfg *cfg, u32 index) -{ - void *ctx; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq = get_hwq(cfg->afu, index); - int rc = 0; - int num_irqs; - enum undo_level level; - - hwq->afu = cfg->afu; - hwq->index = index; - INIT_LIST_HEAD(&hwq->pending_cmds); - - if (index == PRIMARY_HWQ) { - ctx = cfg->ops->get_context(cfg->dev, cfg->afu_cookie); - num_irqs = 3; - } else { - ctx = cfg->ops->dev_context_init(cfg->dev, cfg->afu_cookie); - num_irqs = 2; - } - if (IS_ERR_OR_NULL(ctx)) { - rc = -ENOMEM; - goto err1; - } - - WARN_ON(hwq->ctx_cookie); - hwq->ctx_cookie = ctx; - hwq->num_irqs = num_irqs; - - /* Set it up as a master with the CXL */ - cfg->ops->set_master(ctx); - - /* Reset AFU when initializing primary context */ - if (index == PRIMARY_HWQ) { - rc = cfg->ops->afu_reset(ctx); - if (unlikely(rc)) { - dev_err(dev, "%s: AFU reset failed rc=%d\n", - __func__, rc); - goto err1; - } - } - - level = init_intr(cfg, hwq); - if (unlikely(level)) { - dev_err(dev, "%s: interrupt init failed rc=%d\n", __func__, rc); - goto err2; - } - - /* Finally, activate the context by starting it */ - rc = cfg->ops->start_context(hwq->ctx_cookie); - if (unlikely(rc)) { - dev_err(dev, "%s: start context failed rc=%d\n", __func__, rc); - level = UNMAP_THREE; - goto err2; - } - -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -err2: - term_intr(cfg, level, index); - if (index != PRIMARY_HWQ) - cfg->ops->release_context(ctx); -err1: - hwq->ctx_cookie = NULL; - goto out; -} - -/** - * get_num_afu_ports() - determines and configures the number of AFU ports - * @cfg: Internal structure associated with the host. - * - * This routine determines the number of AFU ports by converting the global - * port selection mask. The converted value is only valid following an AFU - * reset (explicit or power-on). This routine must be invoked shortly after - * mapping as other routines are dependent on the number of ports during the - * initialization sequence. - * - * To support legacy AFUs that might not have reflected an initial global - * port mask (value read is 0), default to the number of ports originally - * supported by the cxlflash driver (2) before hardware with other port - * offerings was introduced. - */ -static void get_num_afu_ports(struct cxlflash_cfg *cfg) -{ - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - u64 port_mask; - int num_fc_ports = LEGACY_FC_PORTS; - - port_mask = readq_be(&afu->afu_map->global.regs.afu_port_sel); - if (port_mask != 0ULL) - num_fc_ports = min(ilog2(port_mask) + 1, MAX_FC_PORTS); - - dev_dbg(dev, "%s: port_mask=%016llx num_fc_ports=%d\n", - __func__, port_mask, num_fc_ports); - - cfg->num_fc_ports = num_fc_ports; - cfg->host->max_channel = PORTNUM2CHAN(num_fc_ports); -} - -/** - * init_afu() - setup as master context and start AFU - * @cfg: Internal structure associated with the host. - * - * This routine is a higher level of control for configuring the - * AFU on probe and reset paths. - * - * Return: 0 on success, -errno on failure - */ -static int init_afu(struct cxlflash_cfg *cfg) -{ - u64 reg; - int rc = 0; - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct hwq *hwq; - int i; - - cfg->ops->perst_reloads_same_image(cfg->afu_cookie, true); - - mutex_init(&afu->sync_active); - afu->num_hwqs = afu->desired_hwqs; - for (i = 0; i < afu->num_hwqs; i++) { - rc = init_mc(cfg, i); - if (rc) { - dev_err(dev, "%s: init_mc failed rc=%d index=%d\n", - __func__, rc, i); - goto err1; - } - } - - /* Map the entire MMIO space of the AFU using the first context */ - hwq = get_hwq(afu, PRIMARY_HWQ); - afu->afu_map = cfg->ops->psa_map(hwq->ctx_cookie); - if (!afu->afu_map) { - dev_err(dev, "%s: psa_map failed\n", __func__); - rc = -ENOMEM; - goto err1; - } - - /* No byte reverse on reading afu_version or string will be backwards */ - reg = readq(&afu->afu_map->global.regs.afu_version); - memcpy(afu->version, ®, sizeof(reg)); - afu->interface_version = - readq_be(&afu->afu_map->global.regs.interface_version); - if ((afu->interface_version + 1) == 0) { - dev_err(dev, "Back level AFU, please upgrade. AFU version %s " - "interface version %016llx\n", afu->version, - afu->interface_version); - rc = -EINVAL; - goto err1; - } - - if (afu_is_sq_cmd_mode(afu)) { - afu->send_cmd = send_cmd_sq; - afu->context_reset = context_reset_sq; - } else { - afu->send_cmd = send_cmd_ioarrin; - afu->context_reset = context_reset_ioarrin; - } - - dev_dbg(dev, "%s: afu_ver=%s interface_ver=%016llx\n", __func__, - afu->version, afu->interface_version); - - get_num_afu_ports(cfg); - - rc = start_afu(cfg); - if (rc) { - dev_err(dev, "%s: start_afu failed, rc=%d\n", __func__, rc); - goto err1; - } - - afu_err_intr_init(cfg->afu); - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - hwq->room = readq_be(&hwq->host_map->cmd_room); - } - - /* Restore the LUN mappings */ - cxlflash_restore_luntable(cfg); -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; - -err1: - for (i = afu->num_hwqs - 1; i >= 0; i--) { - term_intr(cfg, UNMAP_THREE, i); - term_mc(cfg, i); - } - goto out; -} - -/** - * afu_reset() - resets the AFU - * @cfg: Internal structure associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int afu_reset(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - int rc = 0; - - /* Stop the context before the reset. Since the context is - * no longer available restart it after the reset is complete - */ - term_afu(cfg); - - rc = init_afu(cfg); - - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * drain_ioctls() - wait until all currently executing ioctls have completed - * @cfg: Internal structure associated with the host. - * - * Obtain write access to read/write semaphore that wraps ioctl - * handling to 'drain' ioctls currently executing. - */ -static void drain_ioctls(struct cxlflash_cfg *cfg) -{ - down_write(&cfg->ioctl_rwsem); - up_write(&cfg->ioctl_rwsem); -} - -/** - * cxlflash_async_reset_host() - asynchronous host reset handler - * @data: Private data provided while scheduling reset. - * @cookie: Cookie that can be used for checkpointing. - */ -static void cxlflash_async_reset_host(void *data, async_cookie_t cookie) -{ - struct cxlflash_cfg *cfg = data; - struct device *dev = &cfg->dev->dev; - int rc = 0; - - if (cfg->state != STATE_RESET) { - dev_dbg(dev, "%s: Not performing a reset, state=%d\n", - __func__, cfg->state); - goto out; - } - - drain_ioctls(cfg); - cxlflash_mark_contexts_error(cfg); - rc = afu_reset(cfg); - if (rc) - cfg->state = STATE_FAILTERM; - else - cfg->state = STATE_NORMAL; - wake_up_all(&cfg->reset_waitq); - -out: - scsi_unblock_requests(cfg->host); -} - -/** - * cxlflash_schedule_async_reset() - schedule an asynchronous host reset - * @cfg: Internal structure associated with the host. - */ -static void cxlflash_schedule_async_reset(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - - if (cfg->state != STATE_NORMAL) { - dev_dbg(dev, "%s: Not performing reset state=%d\n", - __func__, cfg->state); - return; - } - - cfg->state = STATE_RESET; - scsi_block_requests(cfg->host); - cfg->async_reset_cookie = async_schedule(cxlflash_async_reset_host, - cfg); -} - -/** - * send_afu_cmd() - builds and sends an internal AFU command - * @afu: AFU associated with the host. - * @rcb: Pre-populated IOARCB describing command to send. - * - * The AFU can only take one internal AFU command at a time. This limitation is - * enforced by using a mutex to provide exclusive access to the AFU during the - * operation. This design point requires calling threads to not be on interrupt - * context due to the possibility of sleeping during concurrent AFU operations. - * - * The command status is optionally passed back to the caller when the caller - * populates the IOASA field of the IOARCB with a pointer to an IOASA structure. - * - * Return: - * 0 on success, -errno on failure - */ -static int send_afu_cmd(struct afu *afu, struct sisl_ioarcb *rcb) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct afu_cmd *cmd = NULL; - struct hwq *hwq = get_hwq(afu, PRIMARY_HWQ); - ulong lock_flags; - char *buf = NULL; - int rc = 0; - int nretry = 0; - - if (cfg->state != STATE_NORMAL) { - dev_dbg(dev, "%s: Sync not required state=%u\n", - __func__, cfg->state); - return 0; - } - - mutex_lock(&afu->sync_active); - atomic_inc(&afu->cmds_active); - buf = kmalloc(sizeof(*cmd) + __alignof__(*cmd) - 1, GFP_KERNEL); - if (unlikely(!buf)) { - dev_err(dev, "%s: no memory for command\n", __func__); - rc = -ENOMEM; - goto out; - } - - cmd = (struct afu_cmd *)PTR_ALIGN(buf, __alignof__(*cmd)); - -retry: - memset(cmd, 0, sizeof(*cmd)); - memcpy(&cmd->rcb, rcb, sizeof(*rcb)); - INIT_LIST_HEAD(&cmd->queue); - init_completion(&cmd->cevent); - cmd->parent = afu; - cmd->hwq_index = hwq->index; - cmd->rcb.ctx_id = hwq->ctx_hndl; - - dev_dbg(dev, "%s: afu=%p cmd=%p type=%02x nretry=%d\n", - __func__, afu, cmd, cmd->rcb.cdb[0], nretry); - - rc = afu->send_cmd(afu, cmd); - if (unlikely(rc)) { - rc = -ENOBUFS; - goto out; - } - - rc = wait_resp(afu, cmd); - switch (rc) { - case -ETIMEDOUT: - rc = afu->context_reset(hwq); - if (rc) { - /* Delete the command from pending_cmds list */ - spin_lock_irqsave(&hwq->hsq_slock, lock_flags); - list_del(&cmd->list); - spin_unlock_irqrestore(&hwq->hsq_slock, lock_flags); - - cxlflash_schedule_async_reset(cfg); - break; - } - fallthrough; /* to retry */ - case -EAGAIN: - if (++nretry < 2) - goto retry; - fallthrough; /* to exit */ - default: - break; - } - - if (rcb->ioasa) - *rcb->ioasa = cmd->sa; -out: - atomic_dec(&afu->cmds_active); - mutex_unlock(&afu->sync_active); - kfree(buf); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_afu_sync() - builds and sends an AFU sync command - * @afu: AFU associated with the host. - * @ctx: Identifies context requesting sync. - * @res: Identifies resource requesting sync. - * @mode: Type of sync to issue (lightweight, heavyweight, global). - * - * AFU sync operations are only necessary and allowed when the device is - * operating normally. When not operating normally, sync requests can occur as - * part of cleaning up resources associated with an adapter prior to removal. - * In this scenario, these requests are simply ignored (safe due to the AFU - * going away). - * - * Return: - * 0 on success, -errno on failure - */ -int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx, res_hndl_t res, u8 mode) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct sisl_ioarcb rcb = { 0 }; - - dev_dbg(dev, "%s: afu=%p ctx=%u res=%u mode=%u\n", - __func__, afu, ctx, res, mode); - - rcb.req_flags = SISL_REQ_FLAGS_AFU_CMD; - rcb.msi = SISL_MSI_RRQ_UPDATED; - rcb.timeout = MC_AFU_SYNC_TIMEOUT; - - rcb.cdb[0] = SISL_AFU_CMD_SYNC; - rcb.cdb[1] = mode; - put_unaligned_be16(ctx, &rcb.cdb[2]); - put_unaligned_be32(res, &rcb.cdb[4]); - - return send_afu_cmd(afu, &rcb); -} - -/** - * cxlflash_eh_abort_handler() - abort a SCSI command - * @scp: SCSI command to abort. - * - * CXL Flash devices do not support a single command abort. Reset the context - * as per SISLite specification. Flush any pending commands in the hardware - * queue before the reset. - * - * Return: SUCCESS/FAILED as defined in scsi/scsi.h - */ -static int cxlflash_eh_abort_handler(struct scsi_cmnd *scp) -{ - int rc = FAILED; - struct Scsi_Host *host = scp->device->host; - struct cxlflash_cfg *cfg = shost_priv(host); - struct afu_cmd *cmd = sc_to_afuc(scp); - struct device *dev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct hwq *hwq = get_hwq(afu, cmd->hwq_index); - - dev_dbg(dev, "%s: (scp=%p) %d/%d/%d/%llu " - "cdb=(%08x-%08x-%08x-%08x)\n", __func__, scp, host->host_no, - scp->device->channel, scp->device->id, scp->device->lun, - get_unaligned_be32(&((u32 *)scp->cmnd)[0]), - get_unaligned_be32(&((u32 *)scp->cmnd)[1]), - get_unaligned_be32(&((u32 *)scp->cmnd)[2]), - get_unaligned_be32(&((u32 *)scp->cmnd)[3])); - - /* When the state is not normal, another reset/reload is in progress. - * Return failed and the mid-layer will invoke host reset handler. - */ - if (cfg->state != STATE_NORMAL) { - dev_dbg(dev, "%s: Invalid state for abort, state=%d\n", - __func__, cfg->state); - goto out; - } - - rc = afu->context_reset(hwq); - if (unlikely(rc)) - goto out; - - rc = SUCCESS; - -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_eh_device_reset_handler() - reset a single LUN - * @scp: SCSI command to send. - * - * Return: - * SUCCESS as defined in scsi/scsi.h - * FAILED as defined in scsi/scsi.h - */ -static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp) -{ - int rc = SUCCESS; - struct scsi_device *sdev = scp->device; - struct Scsi_Host *host = sdev->host; - struct cxlflash_cfg *cfg = shost_priv(host); - struct device *dev = &cfg->dev->dev; - int rcr = 0; - - dev_dbg(dev, "%s: %d/%d/%d/%llu\n", __func__, - host->host_no, sdev->channel, sdev->id, sdev->lun); -retry: - switch (cfg->state) { - case STATE_NORMAL: - rcr = send_tmf(cfg, sdev, TMF_LUN_RESET); - if (unlikely(rcr)) - rc = FAILED; - break; - case STATE_RESET: - wait_event(cfg->reset_waitq, cfg->state != STATE_RESET); - goto retry; - default: - rc = FAILED; - break; - } - - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_eh_host_reset_handler() - reset the host adapter - * @scp: SCSI command from stack identifying host. - * - * Following a reset, the state is evaluated again in case an EEH occurred - * during the reset. In such a scenario, the host reset will either yield - * until the EEH recovery is complete or return success or failure based - * upon the current device state. - * - * Return: - * SUCCESS as defined in scsi/scsi.h - * FAILED as defined in scsi/scsi.h - */ -static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) -{ - int rc = SUCCESS; - int rcr = 0; - struct Scsi_Host *host = scp->device->host; - struct cxlflash_cfg *cfg = shost_priv(host); - struct device *dev = &cfg->dev->dev; - - dev_dbg(dev, "%s: %d\n", __func__, host->host_no); - - switch (cfg->state) { - case STATE_NORMAL: - cfg->state = STATE_RESET; - drain_ioctls(cfg); - cxlflash_mark_contexts_error(cfg); - rcr = afu_reset(cfg); - if (rcr) { - rc = FAILED; - cfg->state = STATE_FAILTERM; - } else - cfg->state = STATE_NORMAL; - wake_up_all(&cfg->reset_waitq); - ssleep(1); - fallthrough; - case STATE_RESET: - wait_event(cfg->reset_waitq, cfg->state != STATE_RESET); - if (cfg->state == STATE_NORMAL) - break; - fallthrough; - default: - rc = FAILED; - break; - } - - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_change_queue_depth() - change the queue depth for the device - * @sdev: SCSI device destined for queue depth change. - * @qdepth: Requested queue depth value to set. - * - * The requested queue depth is capped to the maximum supported value. - * - * Return: The actual queue depth set. - */ -static int cxlflash_change_queue_depth(struct scsi_device *sdev, int qdepth) -{ - - if (qdepth > CXLFLASH_MAX_CMDS_PER_LUN) - qdepth = CXLFLASH_MAX_CMDS_PER_LUN; - - scsi_change_queue_depth(sdev, qdepth); - return sdev->queue_depth; -} - -/** - * cxlflash_show_port_status() - queries and presents the current port status - * @port: Desired port for status reporting. - * @cfg: Internal structure associated with the host. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf or -EINVAL. - */ -static ssize_t cxlflash_show_port_status(u32 port, - struct cxlflash_cfg *cfg, - char *buf) -{ - struct device *dev = &cfg->dev->dev; - char *disp_status; - u64 status; - __be64 __iomem *fc_port_regs; - - WARN_ON(port >= MAX_FC_PORTS); - - if (port >= cfg->num_fc_ports) { - dev_info(dev, "%s: Port %d not supported on this card.\n", - __func__, port); - return -EINVAL; - } - - fc_port_regs = get_fc_port_regs(cfg, port); - status = readq_be(&fc_port_regs[FC_MTIP_STATUS / 8]); - status &= FC_MTIP_STATUS_MASK; - - if (status == FC_MTIP_STATUS_ONLINE) - disp_status = "online"; - else if (status == FC_MTIP_STATUS_OFFLINE) - disp_status = "offline"; - else - disp_status = "unknown"; - - return scnprintf(buf, PAGE_SIZE, "%s\n", disp_status); -} - -/** - * port0_show() - queries and presents the current status of port 0 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port0_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_status(0, cfg, buf); -} - -/** - * port1_show() - queries and presents the current status of port 1 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port1_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_status(1, cfg, buf); -} - -/** - * port2_show() - queries and presents the current status of port 2 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port2_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_status(2, cfg, buf); -} - -/** - * port3_show() - queries and presents the current status of port 3 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port3_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_status(3, cfg, buf); -} - -/** - * lun_mode_show() - presents the current LUN mode of the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the LUN mode. - * @buf: Buffer of length PAGE_SIZE to report back the LUN mode in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t lun_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct afu *afu = cfg->afu; - - return scnprintf(buf, PAGE_SIZE, "%u\n", afu->internal_lun); -} - -/** - * lun_mode_store() - sets the LUN mode of the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the LUN mode. - * @buf: Buffer of length PAGE_SIZE containing the LUN mode in ASCII. - * @count: Length of data resizing in @buf. - * - * The CXL Flash AFU supports a dummy LUN mode where the external - * links and storage are not required. Space on the FPGA is used - * to create 1 or 2 small LUNs which are presented to the system - * as if they were a normal storage device. This feature is useful - * during development and also provides manufacturing with a way - * to test the AFU without an actual device. - * - * 0 = external LUN[s] (default) - * 1 = internal LUN (1 x 64K, 512B blocks, id 0) - * 2 = internal LUN (1 x 64K, 4K blocks, id 0) - * 3 = internal LUN (2 x 32K, 512B blocks, ids 0,1) - * 4 = internal LUN (2 x 32K, 4K blocks, ids 0,1) - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t lun_mode_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct Scsi_Host *shost = class_to_shost(dev); - struct cxlflash_cfg *cfg = shost_priv(shost); - struct afu *afu = cfg->afu; - int rc; - u32 lun_mode; - - rc = kstrtouint(buf, 10, &lun_mode); - if (!rc && (lun_mode < 5) && (lun_mode != afu->internal_lun)) { - afu->internal_lun = lun_mode; - - /* - * When configured for internal LUN, there is only one channel, - * channel number 0, else there will be one less than the number - * of fc ports for this card. - */ - if (afu->internal_lun) - shost->max_channel = 0; - else - shost->max_channel = PORTNUM2CHAN(cfg->num_fc_ports); - - afu_reset(cfg); - scsi_scan_host(cfg->host); - } - - return count; -} - -/** - * ioctl_version_show() - presents the current ioctl version of the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the ioctl version. - * @buf: Buffer of length PAGE_SIZE to report back the ioctl version. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t ioctl_version_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - ssize_t bytes = 0; - - bytes = scnprintf(buf, PAGE_SIZE, - "disk: %u\n", DK_CXLFLASH_VERSION_0); - bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, - "host: %u\n", HT_CXLFLASH_VERSION_0); - - return bytes; -} - -/** - * cxlflash_show_port_lun_table() - queries and presents the port LUN table - * @port: Desired port for status reporting. - * @cfg: Internal structure associated with the host. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf or -EINVAL. - */ -static ssize_t cxlflash_show_port_lun_table(u32 port, - struct cxlflash_cfg *cfg, - char *buf) -{ - struct device *dev = &cfg->dev->dev; - __be64 __iomem *fc_port_luns; - int i; - ssize_t bytes = 0; - - WARN_ON(port >= MAX_FC_PORTS); - - if (port >= cfg->num_fc_ports) { - dev_info(dev, "%s: Port %d not supported on this card.\n", - __func__, port); - return -EINVAL; - } - - fc_port_luns = get_fc_port_luns(cfg, port); - - for (i = 0; i < CXLFLASH_NUM_VLUNS; i++) - bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, - "%03d: %016llx\n", - i, readq_be(&fc_port_luns[i])); - return bytes; -} - -/** - * port0_lun_table_show() - presents the current LUN table of port 0 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port0_lun_table_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_lun_table(0, cfg, buf); -} - -/** - * port1_lun_table_show() - presents the current LUN table of port 1 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port1_lun_table_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_lun_table(1, cfg, buf); -} - -/** - * port2_lun_table_show() - presents the current LUN table of port 2 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port2_lun_table_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_lun_table(2, cfg, buf); -} - -/** - * port3_lun_table_show() - presents the current LUN table of port 3 - * @dev: Generic device associated with the host owning the port. - * @attr: Device attribute representing the port. - * @buf: Buffer of length PAGE_SIZE to report back port status in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t port3_lun_table_show(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - - return cxlflash_show_port_lun_table(3, cfg, buf); -} - -/** - * irqpoll_weight_show() - presents the current IRQ poll weight for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the IRQ poll weight. - * @buf: Buffer of length PAGE_SIZE to report back the current IRQ poll - * weight in ASCII. - * - * An IRQ poll weight of 0 indicates polling is disabled. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t irqpoll_weight_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct afu *afu = cfg->afu; - - return scnprintf(buf, PAGE_SIZE, "%u\n", afu->irqpoll_weight); -} - -/** - * irqpoll_weight_store() - sets the current IRQ poll weight for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the IRQ poll weight. - * @buf: Buffer of length PAGE_SIZE containing the desired IRQ poll - * weight in ASCII. - * @count: Length of data resizing in @buf. - * - * An IRQ poll weight of 0 indicates polling is disabled. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t irqpoll_weight_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct device *cfgdev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct hwq *hwq; - u32 weight; - int rc, i; - - rc = kstrtouint(buf, 10, &weight); - if (rc) - return -EINVAL; - - if (weight > 256) { - dev_info(cfgdev, - "Invalid IRQ poll weight. It must be 256 or less.\n"); - return -EINVAL; - } - - if (weight == afu->irqpoll_weight) { - dev_info(cfgdev, - "Current IRQ poll weight has the same weight.\n"); - return -EINVAL; - } - - if (afu_is_irqpoll_enabled(afu)) { - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - irq_poll_disable(&hwq->irqpoll); - } - } - - afu->irqpoll_weight = weight; - - if (weight > 0) { - for (i = 0; i < afu->num_hwqs; i++) { - hwq = get_hwq(afu, i); - - irq_poll_init(&hwq->irqpoll, weight, cxlflash_irqpoll); - } - } - - return count; -} - -/** - * num_hwqs_show() - presents the number of hardware queues for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the number of hardware queues. - * @buf: Buffer of length PAGE_SIZE to report back the number of hardware - * queues in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t num_hwqs_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct afu *afu = cfg->afu; - - return scnprintf(buf, PAGE_SIZE, "%u\n", afu->num_hwqs); -} - -/** - * num_hwqs_store() - sets the number of hardware queues for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the number of hardware queues. - * @buf: Buffer of length PAGE_SIZE containing the number of hardware - * queues in ASCII. - * @count: Length of data resizing in @buf. - * - * n > 0: num_hwqs = n - * n = 0: num_hwqs = num_online_cpus() - * n < 0: num_online_cpus() / abs(n) - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t num_hwqs_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct afu *afu = cfg->afu; - int rc; - int nhwqs, num_hwqs; - - rc = kstrtoint(buf, 10, &nhwqs); - if (rc) - return -EINVAL; - - if (nhwqs >= 1) - num_hwqs = nhwqs; - else if (nhwqs == 0) - num_hwqs = num_online_cpus(); - else - num_hwqs = num_online_cpus() / abs(nhwqs); - - afu->desired_hwqs = min(num_hwqs, CXLFLASH_MAX_HWQS); - WARN_ON_ONCE(afu->desired_hwqs == 0); - -retry: - switch (cfg->state) { - case STATE_NORMAL: - cfg->state = STATE_RESET; - drain_ioctls(cfg); - cxlflash_mark_contexts_error(cfg); - rc = afu_reset(cfg); - if (rc) - cfg->state = STATE_FAILTERM; - else - cfg->state = STATE_NORMAL; - wake_up_all(&cfg->reset_waitq); - break; - case STATE_RESET: - wait_event(cfg->reset_waitq, cfg->state != STATE_RESET); - if (cfg->state == STATE_NORMAL) - goto retry; - fallthrough; - default: - /* Ideally should not happen */ - dev_err(dev, "%s: Device is not ready, state=%d\n", - __func__, cfg->state); - break; - } - - return count; -} - -static const char *hwq_mode_name[MAX_HWQ_MODE] = { "rr", "tag", "cpu" }; - -/** - * hwq_mode_show() - presents the HWQ steering mode for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the HWQ steering mode. - * @buf: Buffer of length PAGE_SIZE to report back the HWQ steering mode - * as a character string. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t hwq_mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxlflash_cfg *cfg = shost_priv(class_to_shost(dev)); - struct afu *afu = cfg->afu; - - return scnprintf(buf, PAGE_SIZE, "%s\n", hwq_mode_name[afu->hwq_mode]); -} - -/** - * hwq_mode_store() - sets the HWQ steering mode for the host - * @dev: Generic device associated with the host. - * @attr: Device attribute representing the HWQ steering mode. - * @buf: Buffer of length PAGE_SIZE containing the HWQ steering mode - * as a character string. - * @count: Length of data resizing in @buf. - * - * rr = Round-Robin - * tag = Block MQ Tagging - * cpu = CPU Affinity - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t hwq_mode_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct Scsi_Host *shost = class_to_shost(dev); - struct cxlflash_cfg *cfg = shost_priv(shost); - struct device *cfgdev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - int i; - u32 mode = MAX_HWQ_MODE; - - for (i = 0; i < MAX_HWQ_MODE; i++) { - if (!strncmp(hwq_mode_name[i], buf, strlen(hwq_mode_name[i]))) { - mode = i; - break; - } - } - - if (mode >= MAX_HWQ_MODE) { - dev_info(cfgdev, "Invalid HWQ steering mode.\n"); - return -EINVAL; - } - - afu->hwq_mode = mode; - - return count; -} - -/** - * mode_show() - presents the current mode of the device - * @dev: Generic device associated with the device. - * @attr: Device attribute representing the device mode. - * @buf: Buffer of length PAGE_SIZE to report back the dev mode in ASCII. - * - * Return: The size of the ASCII string returned in @buf. - */ -static ssize_t mode_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct scsi_device *sdev = to_scsi_device(dev); - - return scnprintf(buf, PAGE_SIZE, "%s\n", - sdev->hostdata ? "superpipe" : "legacy"); -} - -/* - * Host attributes - */ -static DEVICE_ATTR_RO(port0); -static DEVICE_ATTR_RO(port1); -static DEVICE_ATTR_RO(port2); -static DEVICE_ATTR_RO(port3); -static DEVICE_ATTR_RW(lun_mode); -static DEVICE_ATTR_RO(ioctl_version); -static DEVICE_ATTR_RO(port0_lun_table); -static DEVICE_ATTR_RO(port1_lun_table); -static DEVICE_ATTR_RO(port2_lun_table); -static DEVICE_ATTR_RO(port3_lun_table); -static DEVICE_ATTR_RW(irqpoll_weight); -static DEVICE_ATTR_RW(num_hwqs); -static DEVICE_ATTR_RW(hwq_mode); - -static struct attribute *cxlflash_host_attrs[] = { - &dev_attr_port0.attr, - &dev_attr_port1.attr, - &dev_attr_port2.attr, - &dev_attr_port3.attr, - &dev_attr_lun_mode.attr, - &dev_attr_ioctl_version.attr, - &dev_attr_port0_lun_table.attr, - &dev_attr_port1_lun_table.attr, - &dev_attr_port2_lun_table.attr, - &dev_attr_port3_lun_table.attr, - &dev_attr_irqpoll_weight.attr, - &dev_attr_num_hwqs.attr, - &dev_attr_hwq_mode.attr, - NULL -}; - -ATTRIBUTE_GROUPS(cxlflash_host); - -/* - * Device attributes - */ -static DEVICE_ATTR_RO(mode); - -static struct attribute *cxlflash_dev_attrs[] = { - &dev_attr_mode.attr, - NULL -}; - -ATTRIBUTE_GROUPS(cxlflash_dev); - -/* - * Host template - */ -static struct scsi_host_template driver_template = { - .module = THIS_MODULE, - .name = CXLFLASH_ADAPTER_NAME, - .info = cxlflash_driver_info, - .ioctl = cxlflash_ioctl, - .proc_name = CXLFLASH_NAME, - .queuecommand = cxlflash_queuecommand, - .eh_abort_handler = cxlflash_eh_abort_handler, - .eh_device_reset_handler = cxlflash_eh_device_reset_handler, - .eh_host_reset_handler = cxlflash_eh_host_reset_handler, - .change_queue_depth = cxlflash_change_queue_depth, - .cmd_per_lun = CXLFLASH_MAX_CMDS_PER_LUN, - .can_queue = CXLFLASH_MAX_CMDS, - .cmd_size = sizeof(struct afu_cmd) + __alignof__(struct afu_cmd) - 1, - .this_id = -1, - .sg_tablesize = 1, /* No scatter gather support */ - .max_sectors = CXLFLASH_MAX_SECTORS, - .shost_groups = cxlflash_host_groups, - .sdev_groups = cxlflash_dev_groups, -}; - -/* - * Device dependent values - */ -static struct dev_dependent_vals dev_corsa_vals = { CXLFLASH_MAX_SECTORS, - CXLFLASH_WWPN_VPD_REQUIRED }; -static struct dev_dependent_vals dev_flash_gt_vals = { CXLFLASH_MAX_SECTORS, - CXLFLASH_NOTIFY_SHUTDOWN }; -static struct dev_dependent_vals dev_briard_vals = { CXLFLASH_MAX_SECTORS, - (CXLFLASH_NOTIFY_SHUTDOWN | - CXLFLASH_OCXL_DEV) }; - -/* - * PCI device binding table - */ -static const struct pci_device_id cxlflash_pci_table[] = { - {PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CORSA, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_corsa_vals}, - {PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_FLASH_GT, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_flash_gt_vals}, - {PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_BRIARD, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, (kernel_ulong_t)&dev_briard_vals}, - {} -}; - -MODULE_DEVICE_TABLE(pci, cxlflash_pci_table); - -/** - * cxlflash_worker_thread() - work thread handler for the AFU - * @work: Work structure contained within cxlflash associated with host. - * - * Handles the following events: - * - Link reset which cannot be performed on interrupt context due to - * blocking up to a few seconds - * - Rescan the host - */ -static void cxlflash_worker_thread(struct work_struct *work) -{ - struct cxlflash_cfg *cfg = container_of(work, struct cxlflash_cfg, - work_q); - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - __be64 __iomem *fc_port_regs; - int port; - ulong lock_flags; - - /* Avoid MMIO if the device has failed */ - - if (cfg->state != STATE_NORMAL) - return; - - spin_lock_irqsave(cfg->host->host_lock, lock_flags); - - if (cfg->lr_state == LINK_RESET_REQUIRED) { - port = cfg->lr_port; - if (port < 0) - dev_err(dev, "%s: invalid port index %d\n", - __func__, port); - else { - spin_unlock_irqrestore(cfg->host->host_lock, - lock_flags); - - /* The reset can block... */ - fc_port_regs = get_fc_port_regs(cfg, port); - afu_link_reset(afu, port, fc_port_regs); - spin_lock_irqsave(cfg->host->host_lock, lock_flags); - } - - cfg->lr_state = LINK_RESET_COMPLETE; - } - - spin_unlock_irqrestore(cfg->host->host_lock, lock_flags); - - if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0) - scsi_scan_host(cfg->host); -} - -/** - * cxlflash_chr_open() - character device open handler - * @inode: Device inode associated with this character device. - * @file: File pointer for this device. - * - * Only users with admin privileges are allowed to open the character device. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_chr_open(struct inode *inode, struct file *file) -{ - struct cxlflash_cfg *cfg; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - cfg = container_of(inode->i_cdev, struct cxlflash_cfg, cdev); - file->private_data = cfg; - - return 0; -} - -/** - * decode_hioctl() - translates encoded host ioctl to easily identifiable string - * @cmd: The host ioctl command to decode. - * - * Return: A string identifying the decoded host ioctl. - */ -static char *decode_hioctl(unsigned int cmd) -{ - switch (cmd) { - case HT_CXLFLASH_LUN_PROVISION: - return __stringify_1(HT_CXLFLASH_LUN_PROVISION); - } - - return "UNKNOWN"; -} - -/** - * cxlflash_lun_provision() - host LUN provisioning handler - * @cfg: Internal structure associated with the host. - * @arg: Kernel copy of userspace ioctl data structure. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_lun_provision(struct cxlflash_cfg *cfg, void *arg) -{ - struct ht_cxlflash_lun_provision *lunprov = arg; - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct sisl_ioarcb rcb; - struct sisl_ioasa asa; - __be64 __iomem *fc_port_regs; - u16 port = lunprov->port; - u16 scmd = lunprov->hdr.subcmd; - u16 type; - u64 reg; - u64 size; - u64 lun_id; - int rc = 0; - - if (!afu_is_lun_provision(afu)) { - rc = -ENOTSUPP; - goto out; - } - - if (port >= cfg->num_fc_ports) { - rc = -EINVAL; - goto out; - } - - switch (scmd) { - case HT_CXLFLASH_LUN_PROVISION_SUBCMD_CREATE_LUN: - type = SISL_AFU_LUN_PROVISION_CREATE; - size = lunprov->size; - lun_id = 0; - break; - case HT_CXLFLASH_LUN_PROVISION_SUBCMD_DELETE_LUN: - type = SISL_AFU_LUN_PROVISION_DELETE; - size = 0; - lun_id = lunprov->lun_id; - break; - case HT_CXLFLASH_LUN_PROVISION_SUBCMD_QUERY_PORT: - fc_port_regs = get_fc_port_regs(cfg, port); - - reg = readq_be(&fc_port_regs[FC_MAX_NUM_LUNS / 8]); - lunprov->max_num_luns = reg; - reg = readq_be(&fc_port_regs[FC_CUR_NUM_LUNS / 8]); - lunprov->cur_num_luns = reg; - reg = readq_be(&fc_port_regs[FC_MAX_CAP_PORT / 8]); - lunprov->max_cap_port = reg; - reg = readq_be(&fc_port_regs[FC_CUR_CAP_PORT / 8]); - lunprov->cur_cap_port = reg; - - goto out; - default: - rc = -EINVAL; - goto out; - } - - memset(&rcb, 0, sizeof(rcb)); - memset(&asa, 0, sizeof(asa)); - rcb.req_flags = SISL_REQ_FLAGS_AFU_CMD; - rcb.lun_id = lun_id; - rcb.msi = SISL_MSI_RRQ_UPDATED; - rcb.timeout = MC_LUN_PROV_TIMEOUT; - rcb.ioasa = &asa; - - rcb.cdb[0] = SISL_AFU_CMD_LUN_PROVISION; - rcb.cdb[1] = type; - rcb.cdb[2] = port; - put_unaligned_be64(size, &rcb.cdb[8]); - - rc = send_afu_cmd(afu, &rcb); - if (rc) { - dev_err(dev, "%s: send_afu_cmd failed rc=%d asc=%08x afux=%x\n", - __func__, rc, asa.ioasc, asa.afu_extra); - goto out; - } - - if (scmd == HT_CXLFLASH_LUN_PROVISION_SUBCMD_CREATE_LUN) { - lunprov->lun_id = (u64)asa.lunid_hi << 32 | asa.lunid_lo; - memcpy(lunprov->wwid, asa.wwid, sizeof(lunprov->wwid)); - } -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_afu_debug() - host AFU debug handler - * @cfg: Internal structure associated with the host. - * @arg: Kernel copy of userspace ioctl data structure. - * - * For debug requests requiring a data buffer, always provide an aligned - * (cache line) buffer to the AFU to appease any alignment requirements. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_afu_debug(struct cxlflash_cfg *cfg, void *arg) -{ - struct ht_cxlflash_afu_debug *afu_dbg = arg; - struct afu *afu = cfg->afu; - struct device *dev = &cfg->dev->dev; - struct sisl_ioarcb rcb; - struct sisl_ioasa asa; - char *buf = NULL; - char *kbuf = NULL; - void __user *ubuf = (__force void __user *)afu_dbg->data_ea; - u16 req_flags = SISL_REQ_FLAGS_AFU_CMD; - u32 ulen = afu_dbg->data_len; - bool is_write = afu_dbg->hdr.flags & HT_CXLFLASH_HOST_WRITE; - int rc = 0; - - if (!afu_is_afu_debug(afu)) { - rc = -ENOTSUPP; - goto out; - } - - if (ulen) { - req_flags |= SISL_REQ_FLAGS_SUP_UNDERRUN; - - if (ulen > HT_CXLFLASH_AFU_DEBUG_MAX_DATA_LEN) { - rc = -EINVAL; - goto out; - } - - buf = kmalloc(ulen + cache_line_size() - 1, GFP_KERNEL); - if (unlikely(!buf)) { - rc = -ENOMEM; - goto out; - } - - kbuf = PTR_ALIGN(buf, cache_line_size()); - - if (is_write) { - req_flags |= SISL_REQ_FLAGS_HOST_WRITE; - - if (copy_from_user(kbuf, ubuf, ulen)) { - rc = -EFAULT; - goto out; - } - } - } - - memset(&rcb, 0, sizeof(rcb)); - memset(&asa, 0, sizeof(asa)); - - rcb.req_flags = req_flags; - rcb.msi = SISL_MSI_RRQ_UPDATED; - rcb.timeout = MC_AFU_DEBUG_TIMEOUT; - rcb.ioasa = &asa; - - if (ulen) { - rcb.data_len = ulen; - rcb.data_ea = (uintptr_t)kbuf; - } - - rcb.cdb[0] = SISL_AFU_CMD_DEBUG; - memcpy(&rcb.cdb[4], afu_dbg->afu_subcmd, - HT_CXLFLASH_AFU_DEBUG_SUBCMD_LEN); - - rc = send_afu_cmd(afu, &rcb); - if (rc) { - dev_err(dev, "%s: send_afu_cmd failed rc=%d asc=%08x afux=%x\n", - __func__, rc, asa.ioasc, asa.afu_extra); - goto out; - } - - if (ulen && !is_write) { - if (copy_to_user(ubuf, kbuf, ulen)) - rc = -EFAULT; - } -out: - kfree(buf); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_chr_ioctl() - character device IOCTL handler - * @file: File pointer for this device. - * @cmd: IOCTL command. - * @arg: Userspace ioctl data structure. - * - * A read/write semaphore is used to implement a 'drain' of currently - * running ioctls. The read semaphore is taken at the beginning of each - * ioctl thread and released upon concluding execution. Additionally the - * semaphore should be released and then reacquired in any ioctl execution - * path which will wait for an event to occur that is outside the scope of - * the ioctl (i.e. an adapter reset). To drain the ioctls currently running, - * a thread simply needs to acquire the write semaphore. - * - * Return: 0 on success, -errno on failure - */ -static long cxlflash_chr_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - typedef int (*hioctl) (struct cxlflash_cfg *, void *); - - struct cxlflash_cfg *cfg = file->private_data; - struct device *dev = &cfg->dev->dev; - char buf[sizeof(union cxlflash_ht_ioctls)]; - void __user *uarg = (void __user *)arg; - struct ht_cxlflash_hdr *hdr; - size_t size = 0; - bool known_ioctl = false; - int idx = 0; - int rc = 0; - hioctl do_ioctl = NULL; - - static const struct { - size_t size; - hioctl ioctl; - } ioctl_tbl[] = { /* NOTE: order matters here */ - { sizeof(struct ht_cxlflash_lun_provision), cxlflash_lun_provision }, - { sizeof(struct ht_cxlflash_afu_debug), cxlflash_afu_debug }, - }; - - /* Hold read semaphore so we can drain if needed */ - down_read(&cfg->ioctl_rwsem); - - dev_dbg(dev, "%s: cmd=%u idx=%d tbl_size=%lu\n", - __func__, cmd, idx, sizeof(ioctl_tbl)); - - switch (cmd) { - case HT_CXLFLASH_LUN_PROVISION: - case HT_CXLFLASH_AFU_DEBUG: - known_ioctl = true; - idx = _IOC_NR(HT_CXLFLASH_LUN_PROVISION) - _IOC_NR(cmd); - size = ioctl_tbl[idx].size; - do_ioctl = ioctl_tbl[idx].ioctl; - - if (likely(do_ioctl)) - break; - - fallthrough; - default: - rc = -EINVAL; - goto out; - } - - if (unlikely(copy_from_user(&buf, uarg, size))) { - dev_err(dev, "%s: copy_from_user() fail " - "size=%lu cmd=%d (%s) uarg=%p\n", - __func__, size, cmd, decode_hioctl(cmd), uarg); - rc = -EFAULT; - goto out; - } - - hdr = (struct ht_cxlflash_hdr *)&buf; - if (hdr->version != HT_CXLFLASH_VERSION_0) { - dev_dbg(dev, "%s: Version %u not supported for %s\n", - __func__, hdr->version, decode_hioctl(cmd)); - rc = -EINVAL; - goto out; - } - - if (hdr->rsvd[0] || hdr->rsvd[1] || hdr->return_flags) { - dev_dbg(dev, "%s: Reserved/rflags populated\n", __func__); - rc = -EINVAL; - goto out; - } - - rc = do_ioctl(cfg, (void *)&buf); - if (likely(!rc)) - if (unlikely(copy_to_user(uarg, &buf, size))) { - dev_err(dev, "%s: copy_to_user() fail " - "size=%lu cmd=%d (%s) uarg=%p\n", - __func__, size, cmd, decode_hioctl(cmd), uarg); - rc = -EFAULT; - } - - /* fall through to exit */ - -out: - up_read(&cfg->ioctl_rwsem); - if (unlikely(rc && known_ioctl)) - dev_err(dev, "%s: ioctl %s (%08X) returned rc=%d\n", - __func__, decode_hioctl(cmd), cmd, rc); - else - dev_dbg(dev, "%s: ioctl %s (%08X) returned rc=%d\n", - __func__, decode_hioctl(cmd), cmd, rc); - return rc; -} - -/* - * Character device file operations - */ -static const struct file_operations cxlflash_chr_fops = { - .owner = THIS_MODULE, - .open = cxlflash_chr_open, - .unlocked_ioctl = cxlflash_chr_ioctl, - .compat_ioctl = compat_ptr_ioctl, -}; - -/** - * init_chrdev() - initialize the character device for the host - * @cfg: Internal structure associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int init_chrdev(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - struct device *char_dev; - dev_t devno; - int minor; - int rc = 0; - - minor = cxlflash_get_minor(); - if (unlikely(minor < 0)) { - dev_err(dev, "%s: Exhausted allowed adapters\n", __func__); - rc = -ENOSPC; - goto out; - } - - devno = MKDEV(cxlflash_major, minor); - cdev_init(&cfg->cdev, &cxlflash_chr_fops); - - rc = cdev_add(&cfg->cdev, devno, 1); - if (rc) { - dev_err(dev, "%s: cdev_add failed rc=%d\n", __func__, rc); - goto err1; - } - - char_dev = device_create(&cxlflash_class, NULL, devno, - NULL, "cxlflash%d", minor); - if (IS_ERR(char_dev)) { - rc = PTR_ERR(char_dev); - dev_err(dev, "%s: device_create failed rc=%d\n", - __func__, rc); - goto err2; - } - - cfg->chardev = char_dev; -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -err2: - cdev_del(&cfg->cdev); -err1: - cxlflash_put_minor(minor); - goto out; -} - -/** - * cxlflash_probe() - PCI entry point to add host - * @pdev: PCI device associated with the host. - * @dev_id: PCI device id associated with device. - * - * The device will initially start out in a 'probing' state and - * transition to the 'normal' state at the end of a successful - * probe. Should an EEH event occur during probe, the notification - * thread (error_detected()) will wait until the probe handler - * is nearly complete. At that time, the device will be moved to - * a 'probed' state and the EEH thread woken up to drive the slot - * reset and recovery (device moves to 'normal' state). Meanwhile, - * the probe will be allowed to exit successfully. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_probe(struct pci_dev *pdev, - const struct pci_device_id *dev_id) -{ - struct Scsi_Host *host; - struct cxlflash_cfg *cfg = NULL; - struct device *dev = &pdev->dev; - struct dev_dependent_vals *ddv; - int rc = 0; - int k; - - dev_err_once(&pdev->dev, "DEPRECATION: cxlflash is deprecated and will be removed in a future kernel release\n"); - - dev_dbg(&pdev->dev, "%s: Found CXLFLASH with IRQ: %d\n", - __func__, pdev->irq); - - ddv = (struct dev_dependent_vals *)dev_id->driver_data; - driver_template.max_sectors = ddv->max_sectors; - - host = scsi_host_alloc(&driver_template, sizeof(struct cxlflash_cfg)); - if (!host) { - dev_err(dev, "%s: scsi_host_alloc failed\n", __func__); - rc = -ENOMEM; - goto out; - } - - host->max_id = CXLFLASH_MAX_NUM_TARGETS_PER_BUS; - host->max_lun = CXLFLASH_MAX_NUM_LUNS_PER_TARGET; - host->unique_id = host->host_no; - host->max_cmd_len = CXLFLASH_MAX_CDB_LEN; - - cfg = shost_priv(host); - cfg->state = STATE_PROBING; - cfg->host = host; - rc = alloc_mem(cfg); - if (rc) { - dev_err(dev, "%s: alloc_mem failed\n", __func__); - rc = -ENOMEM; - scsi_host_put(cfg->host); - goto out; - } - - cfg->init_state = INIT_STATE_NONE; - cfg->dev = pdev; - cfg->cxl_fops = cxlflash_cxl_fops; - cfg->ops = cxlflash_assign_ops(ddv); - WARN_ON_ONCE(!cfg->ops); - - /* - * Promoted LUNs move to the top of the LUN table. The rest stay on - * the bottom half. The bottom half grows from the end (index = 255), - * whereas the top half grows from the beginning (index = 0). - * - * Initialize the last LUN index for all possible ports. - */ - cfg->promote_lun_index = 0; - - for (k = 0; k < MAX_FC_PORTS; k++) - cfg->last_lun_index[k] = CXLFLASH_NUM_VLUNS/2 - 1; - - cfg->dev_id = (struct pci_device_id *)dev_id; - - init_waitqueue_head(&cfg->tmf_waitq); - init_waitqueue_head(&cfg->reset_waitq); - - INIT_WORK(&cfg->work_q, cxlflash_worker_thread); - cfg->lr_state = LINK_RESET_INVALID; - cfg->lr_port = -1; - spin_lock_init(&cfg->tmf_slock); - mutex_init(&cfg->ctx_tbl_list_mutex); - mutex_init(&cfg->ctx_recovery_mutex); - init_rwsem(&cfg->ioctl_rwsem); - INIT_LIST_HEAD(&cfg->ctx_err_recovery); - INIT_LIST_HEAD(&cfg->lluns); - - pci_set_drvdata(pdev, cfg); - - rc = init_pci(cfg); - if (rc) { - dev_err(dev, "%s: init_pci failed rc=%d\n", __func__, rc); - goto out_remove; - } - cfg->init_state = INIT_STATE_PCI; - - cfg->afu_cookie = cfg->ops->create_afu(pdev); - if (unlikely(!cfg->afu_cookie)) { - dev_err(dev, "%s: create_afu failed\n", __func__); - rc = -ENOMEM; - goto out_remove; - } - - rc = init_afu(cfg); - if (rc && !wq_has_sleeper(&cfg->reset_waitq)) { - dev_err(dev, "%s: init_afu failed rc=%d\n", __func__, rc); - goto out_remove; - } - cfg->init_state = INIT_STATE_AFU; - - rc = init_scsi(cfg); - if (rc) { - dev_err(dev, "%s: init_scsi failed rc=%d\n", __func__, rc); - goto out_remove; - } - cfg->init_state = INIT_STATE_SCSI; - - rc = init_chrdev(cfg); - if (rc) { - dev_err(dev, "%s: init_chrdev failed rc=%d\n", __func__, rc); - goto out_remove; - } - cfg->init_state = INIT_STATE_CDEV; - - if (wq_has_sleeper(&cfg->reset_waitq)) { - cfg->state = STATE_PROBED; - wake_up_all(&cfg->reset_waitq); - } else - cfg->state = STATE_NORMAL; -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; - -out_remove: - cfg->state = STATE_PROBED; - cxlflash_remove(pdev); - goto out; -} - -/** - * cxlflash_pci_error_detected() - called when a PCI error is detected - * @pdev: PCI device struct. - * @state: PCI channel state. - * - * When an EEH occurs during an active reset, wait until the reset is - * complete and then take action based upon the device state. - * - * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT - */ -static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - int rc = 0; - struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); - struct device *dev = &cfg->dev->dev; - - dev_dbg(dev, "%s: pdev=%p state=%u\n", __func__, pdev, state); - - switch (state) { - case pci_channel_io_frozen: - wait_event(cfg->reset_waitq, cfg->state != STATE_RESET && - cfg->state != STATE_PROBING); - if (cfg->state == STATE_FAILTERM) - return PCI_ERS_RESULT_DISCONNECT; - - cfg->state = STATE_RESET; - scsi_block_requests(cfg->host); - drain_ioctls(cfg); - rc = cxlflash_mark_contexts_error(cfg); - if (unlikely(rc)) - dev_err(dev, "%s: Failed to mark user contexts rc=%d\n", - __func__, rc); - term_afu(cfg); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - cfg->state = STATE_FAILTERM; - wake_up_all(&cfg->reset_waitq); - scsi_unblock_requests(cfg->host); - return PCI_ERS_RESULT_DISCONNECT; - default: - break; - } - return PCI_ERS_RESULT_NEED_RESET; -} - -/** - * cxlflash_pci_slot_reset() - called when PCI slot has been reset - * @pdev: PCI device struct. - * - * This routine is called by the pci error recovery code after the PCI - * slot has been reset, just before we should resume normal operations. - * - * Return: PCI_ERS_RESULT_RECOVERED or PCI_ERS_RESULT_DISCONNECT - */ -static pci_ers_result_t cxlflash_pci_slot_reset(struct pci_dev *pdev) -{ - int rc = 0; - struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); - struct device *dev = &cfg->dev->dev; - - dev_dbg(dev, "%s: pdev=%p\n", __func__, pdev); - - rc = init_afu(cfg); - if (unlikely(rc)) { - dev_err(dev, "%s: EEH recovery failed rc=%d\n", __func__, rc); - return PCI_ERS_RESULT_DISCONNECT; - } - - return PCI_ERS_RESULT_RECOVERED; -} - -/** - * cxlflash_pci_resume() - called when normal operation can resume - * @pdev: PCI device struct - */ -static void cxlflash_pci_resume(struct pci_dev *pdev) -{ - struct cxlflash_cfg *cfg = pci_get_drvdata(pdev); - struct device *dev = &cfg->dev->dev; - - dev_dbg(dev, "%s: pdev=%p\n", __func__, pdev); - - cfg->state = STATE_NORMAL; - wake_up_all(&cfg->reset_waitq); - scsi_unblock_requests(cfg->host); -} - -/** - * cxlflash_devnode() - provides devtmpfs for devices in the cxlflash class - * @dev: Character device. - * @mode: Mode that can be used to verify access. - * - * Return: Allocated string describing the devtmpfs structure. - */ -static char *cxlflash_devnode(const struct device *dev, umode_t *mode) -{ - return kasprintf(GFP_KERNEL, "cxlflash/%s", dev_name(dev)); -} - -/** - * cxlflash_class_init() - create character device class - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_class_init(void) -{ - dev_t devno; - int rc = 0; - - rc = alloc_chrdev_region(&devno, 0, CXLFLASH_MAX_ADAPTERS, "cxlflash"); - if (unlikely(rc)) { - pr_err("%s: alloc_chrdev_region failed rc=%d\n", __func__, rc); - goto out; - } - - cxlflash_major = MAJOR(devno); - - rc = class_register(&cxlflash_class); - if (rc) { - pr_err("%s: class_create failed rc=%d\n", __func__, rc); - goto err; - } - -out: - pr_debug("%s: returning rc=%d\n", __func__, rc); - return rc; -err: - unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS); - goto out; -} - -/** - * cxlflash_class_exit() - destroy character device class - */ -static void cxlflash_class_exit(void) -{ - dev_t devno = MKDEV(cxlflash_major, 0); - - class_unregister(&cxlflash_class); - unregister_chrdev_region(devno, CXLFLASH_MAX_ADAPTERS); -} - -static const struct pci_error_handlers cxlflash_err_handler = { - .error_detected = cxlflash_pci_error_detected, - .slot_reset = cxlflash_pci_slot_reset, - .resume = cxlflash_pci_resume, -}; - -/* - * PCI device structure - */ -static struct pci_driver cxlflash_driver = { - .name = CXLFLASH_NAME, - .id_table = cxlflash_pci_table, - .probe = cxlflash_probe, - .remove = cxlflash_remove, - .shutdown = cxlflash_remove, - .err_handler = &cxlflash_err_handler, -}; - -/** - * init_cxlflash() - module entry point - * - * Return: 0 on success, -errno on failure - */ -static int __init init_cxlflash(void) -{ - int rc; - - check_sizes(); - cxlflash_list_init(); - rc = cxlflash_class_init(); - if (unlikely(rc)) - goto out; - - rc = pci_register_driver(&cxlflash_driver); - if (unlikely(rc)) - goto err; -out: - pr_debug("%s: returning rc=%d\n", __func__, rc); - return rc; -err: - cxlflash_class_exit(); - goto out; -} - -/** - * exit_cxlflash() - module exit point - */ -static void __exit exit_cxlflash(void) -{ - cxlflash_term_global_luns(); - cxlflash_free_errpage(); - - pci_unregister_driver(&cxlflash_driver); - cxlflash_class_exit(); -} - -module_init(init_cxlflash); -module_exit(exit_cxlflash); diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h deleted file mode 100644 index 0bfb98effce0..000000000000 --- a/drivers/scsi/cxlflash/main.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#ifndef _CXLFLASH_MAIN_H -#define _CXLFLASH_MAIN_H - -#include <linux/list.h> -#include <linux/types.h> -#include <scsi/scsi.h> -#include <scsi/scsi_device.h> - -#include "backend.h" - -#define CXLFLASH_NAME "cxlflash" -#define CXLFLASH_ADAPTER_NAME "IBM POWER CXL Flash Adapter" -#define CXLFLASH_MAX_ADAPTERS 32 - -#define PCI_DEVICE_ID_IBM_CORSA 0x04F0 -#define PCI_DEVICE_ID_IBM_FLASH_GT 0x0600 -#define PCI_DEVICE_ID_IBM_BRIARD 0x0624 - -/* Since there is only one target, make it 0 */ -#define CXLFLASH_TARGET 0 -#define CXLFLASH_MAX_CDB_LEN 16 - -/* Really only one target per bus since the Texan is directly attached */ -#define CXLFLASH_MAX_NUM_TARGETS_PER_BUS 1 -#define CXLFLASH_MAX_NUM_LUNS_PER_TARGET 65536 - -#define CXLFLASH_PCI_ERROR_RECOVERY_TIMEOUT (120 * HZ) - -/* FC defines */ -#define FC_MTIP_CMDCONFIG 0x010 -#define FC_MTIP_STATUS 0x018 -#define FC_MAX_NUM_LUNS 0x080 /* Max LUNs host can provision for port */ -#define FC_CUR_NUM_LUNS 0x088 /* Cur number LUNs provisioned for port */ -#define FC_MAX_CAP_PORT 0x090 /* Max capacity all LUNs for port (4K blocks) */ -#define FC_CUR_CAP_PORT 0x098 /* Cur capacity all LUNs for port (4K blocks) */ - -#define FC_PNAME 0x300 -#define FC_CONFIG 0x320 -#define FC_CONFIG2 0x328 -#define FC_STATUS 0x330 -#define FC_ERROR 0x380 -#define FC_ERRCAP 0x388 -#define FC_ERRMSK 0x390 -#define FC_CNT_CRCERR 0x538 -#define FC_CRC_THRESH 0x580 - -#define FC_MTIP_CMDCONFIG_ONLINE 0x20ULL -#define FC_MTIP_CMDCONFIG_OFFLINE 0x40ULL - -#define FC_MTIP_STATUS_MASK 0x30ULL -#define FC_MTIP_STATUS_ONLINE 0x20ULL -#define FC_MTIP_STATUS_OFFLINE 0x10ULL - -/* TIMEOUT and RETRY definitions */ - -/* AFU command timeout values */ -#define MC_AFU_SYNC_TIMEOUT 5 /* 5 secs */ -#define MC_LUN_PROV_TIMEOUT 5 /* 5 secs */ -#define MC_AFU_DEBUG_TIMEOUT 5 /* 5 secs */ - -/* AFU command room retry limit */ -#define MC_ROOM_RETRY_CNT 10 - -/* FC CRC clear periodic timer */ -#define MC_CRC_THRESH 100 /* threshold in 5 mins */ - -#define FC_PORT_STATUS_RETRY_CNT 100 /* 100 100ms retries = 10 seconds */ -#define FC_PORT_STATUS_RETRY_INTERVAL_US 100000 /* microseconds */ - -/* VPD defines */ -#define CXLFLASH_VPD_LEN 256 -#define WWPN_LEN 16 -#define WWPN_BUF_LEN (WWPN_LEN + 1) - -enum undo_level { - UNDO_NOOP = 0, - FREE_IRQ, - UNMAP_ONE, - UNMAP_TWO, - UNMAP_THREE -}; - -struct dev_dependent_vals { - u64 max_sectors; - u64 flags; -#define CXLFLASH_NOTIFY_SHUTDOWN 0x0000000000000001ULL -#define CXLFLASH_WWPN_VPD_REQUIRED 0x0000000000000002ULL -#define CXLFLASH_OCXL_DEV 0x0000000000000004ULL -}; - -static inline const struct cxlflash_backend_ops * -cxlflash_assign_ops(struct dev_dependent_vals *ddv) -{ - const struct cxlflash_backend_ops *ops = NULL; - -#ifdef CONFIG_OCXL_BASE - if (ddv->flags & CXLFLASH_OCXL_DEV) - ops = &cxlflash_ocxl_ops; -#endif - -#ifdef CONFIG_CXL_BASE - if (!(ddv->flags & CXLFLASH_OCXL_DEV)) - ops = &cxlflash_cxl_ops; -#endif - - return ops; -} - -struct asyc_intr_info { - u64 status; - char *desc; - u8 port; - u8 action; -#define CLR_FC_ERROR 0x01 -#define LINK_RESET 0x02 -#define SCAN_HOST 0x04 -}; - -#endif /* _CXLFLASH_MAIN_H */ diff --git a/drivers/scsi/cxlflash/ocxl_hw.c b/drivers/scsi/cxlflash/ocxl_hw.c deleted file mode 100644 index 6542818e595a..000000000000 --- a/drivers/scsi/cxlflash/ocxl_hw.c +++ /dev/null @@ -1,1399 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * Uma Krishnan <ukrishn@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2018 IBM Corporation - */ - -#include <linux/file.h> -#include <linux/idr.h> -#include <linux/module.h> -#include <linux/mount.h> -#include <linux/pseudo_fs.h> -#include <linux/poll.h> -#include <linux/sched/signal.h> -#include <linux/interrupt.h> -#include <linux/irqdomain.h> -#include <asm/xive.h> -#include <misc/ocxl.h> - -#include <uapi/misc/cxl.h> - -#include "backend.h" -#include "ocxl_hw.h" - -/* - * Pseudo-filesystem to allocate inodes. - */ - -#define OCXLFLASH_FS_MAGIC 0x1697698f - -static int ocxlflash_fs_cnt; -static struct vfsmount *ocxlflash_vfs_mount; - -static int ocxlflash_fs_init_fs_context(struct fs_context *fc) -{ - return init_pseudo(fc, OCXLFLASH_FS_MAGIC) ? 0 : -ENOMEM; -} - -static struct file_system_type ocxlflash_fs_type = { - .name = "ocxlflash", - .owner = THIS_MODULE, - .init_fs_context = ocxlflash_fs_init_fs_context, - .kill_sb = kill_anon_super, -}; - -/* - * ocxlflash_release_mapping() - release the memory mapping - * @ctx: Context whose mapping is to be released. - */ -static void ocxlflash_release_mapping(struct ocxlflash_context *ctx) -{ - if (ctx->mapping) - simple_release_fs(&ocxlflash_vfs_mount, &ocxlflash_fs_cnt); - ctx->mapping = NULL; -} - -/* - * ocxlflash_getfile() - allocate pseudo filesystem, inode, and the file - * @dev: Generic device of the host. - * @name: Name of the pseudo filesystem. - * @fops: File operations. - * @priv: Private data. - * @flags: Flags for the file. - * - * Return: pointer to the file on success, ERR_PTR on failure - */ -static struct file *ocxlflash_getfile(struct device *dev, const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - struct file *file; - struct inode *inode; - int rc; - - if (fops->owner && !try_module_get(fops->owner)) { - dev_err(dev, "%s: Owner does not exist\n", __func__); - rc = -ENOENT; - goto err1; - } - - rc = simple_pin_fs(&ocxlflash_fs_type, &ocxlflash_vfs_mount, - &ocxlflash_fs_cnt); - if (unlikely(rc < 0)) { - dev_err(dev, "%s: Cannot mount ocxlflash pseudofs rc=%d\n", - __func__, rc); - goto err2; - } - - inode = alloc_anon_inode(ocxlflash_vfs_mount->mnt_sb); - if (IS_ERR(inode)) { - rc = PTR_ERR(inode); - dev_err(dev, "%s: alloc_anon_inode failed rc=%d\n", - __func__, rc); - goto err3; - } - - file = alloc_file_pseudo(inode, ocxlflash_vfs_mount, name, - flags & (O_ACCMODE | O_NONBLOCK), fops); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - dev_err(dev, "%s: alloc_file failed rc=%d\n", - __func__, rc); - goto err4; - } - - file->private_data = priv; -out: - return file; -err4: - iput(inode); -err3: - simple_release_fs(&ocxlflash_vfs_mount, &ocxlflash_fs_cnt); -err2: - module_put(fops->owner); -err1: - file = ERR_PTR(rc); - goto out; -} - -/** - * ocxlflash_psa_map() - map the process specific MMIO space - * @ctx_cookie: Adapter context for which the mapping needs to be done. - * - * Return: MMIO pointer of the mapped region - */ -static void __iomem *ocxlflash_psa_map(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct device *dev = ctx->hw_afu->dev; - - mutex_lock(&ctx->state_mutex); - if (ctx->state != STARTED) { - dev_err(dev, "%s: Context not started, state=%d\n", __func__, - ctx->state); - mutex_unlock(&ctx->state_mutex); - return NULL; - } - mutex_unlock(&ctx->state_mutex); - - return ioremap(ctx->psn_phys, ctx->psn_size); -} - -/** - * ocxlflash_psa_unmap() - unmap the process specific MMIO space - * @addr: MMIO pointer to unmap. - */ -static void ocxlflash_psa_unmap(void __iomem *addr) -{ - iounmap(addr); -} - -/** - * ocxlflash_process_element() - get process element of the adapter context - * @ctx_cookie: Adapter context associated with the process element. - * - * Return: process element of the adapter context - */ -static int ocxlflash_process_element(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - - return ctx->pe; -} - -/** - * afu_map_irq() - map the interrupt of the adapter context - * @flags: Flags. - * @ctx: Adapter context. - * @num: Per-context AFU interrupt number. - * @handler: Interrupt handler to register. - * @cookie: Interrupt handler private data. - * @name: Name of the interrupt. - * - * Return: 0 on success, -errno on failure - */ -static int afu_map_irq(u64 flags, struct ocxlflash_context *ctx, int num, - irq_handler_t handler, void *cookie, char *name) -{ - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct device *dev = afu->dev; - struct ocxlflash_irqs *irq; - struct xive_irq_data *xd; - u32 virq; - int rc = 0; - - if (num < 0 || num >= ctx->num_irqs) { - dev_err(dev, "%s: Interrupt %d not allocated\n", __func__, num); - rc = -ENOENT; - goto out; - } - - irq = &ctx->irqs[num]; - virq = irq_create_mapping(NULL, irq->hwirq); - if (unlikely(!virq)) { - dev_err(dev, "%s: irq_create_mapping failed\n", __func__); - rc = -ENOMEM; - goto out; - } - - rc = request_irq(virq, handler, 0, name, cookie); - if (unlikely(rc)) { - dev_err(dev, "%s: request_irq failed rc=%d\n", __func__, rc); - goto err1; - } - - xd = irq_get_handler_data(virq); - if (unlikely(!xd)) { - dev_err(dev, "%s: Can't get interrupt data\n", __func__); - rc = -ENXIO; - goto err2; - } - - irq->virq = virq; - irq->vtrig = xd->trig_mmio; -out: - return rc; -err2: - free_irq(virq, cookie); -err1: - irq_dispose_mapping(virq); - goto out; -} - -/** - * ocxlflash_map_afu_irq() - map the interrupt of the adapter context - * @ctx_cookie: Adapter context. - * @num: Per-context AFU interrupt number. - * @handler: Interrupt handler to register. - * @cookie: Interrupt handler private data. - * @name: Name of the interrupt. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_map_afu_irq(void *ctx_cookie, int num, - irq_handler_t handler, void *cookie, - char *name) -{ - return afu_map_irq(0, ctx_cookie, num, handler, cookie, name); -} - -/** - * afu_unmap_irq() - unmap the interrupt - * @flags: Flags. - * @ctx: Adapter context. - * @num: Per-context AFU interrupt number. - * @cookie: Interrupt handler private data. - */ -static void afu_unmap_irq(u64 flags, struct ocxlflash_context *ctx, int num, - void *cookie) -{ - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct device *dev = afu->dev; - struct ocxlflash_irqs *irq; - - if (num < 0 || num >= ctx->num_irqs) { - dev_err(dev, "%s: Interrupt %d not allocated\n", __func__, num); - return; - } - - irq = &ctx->irqs[num]; - - if (irq_find_mapping(NULL, irq->hwirq)) { - free_irq(irq->virq, cookie); - irq_dispose_mapping(irq->virq); - } - - memset(irq, 0, sizeof(*irq)); -} - -/** - * ocxlflash_unmap_afu_irq() - unmap the interrupt - * @ctx_cookie: Adapter context. - * @num: Per-context AFU interrupt number. - * @cookie: Interrupt handler private data. - */ -static void ocxlflash_unmap_afu_irq(void *ctx_cookie, int num, void *cookie) -{ - return afu_unmap_irq(0, ctx_cookie, num, cookie); -} - -/** - * ocxlflash_get_irq_objhndl() - get the object handle for an interrupt - * @ctx_cookie: Context associated with the interrupt. - * @irq: Interrupt number. - * - * Return: effective address of the mapped region - */ -static u64 ocxlflash_get_irq_objhndl(void *ctx_cookie, int irq) -{ - struct ocxlflash_context *ctx = ctx_cookie; - - if (irq < 0 || irq >= ctx->num_irqs) - return 0; - - return (__force u64)ctx->irqs[irq].vtrig; -} - -/** - * ocxlflash_xsl_fault() - callback when translation error is triggered - * @data: Private data provided at callback registration, the context. - * @addr: Address that triggered the error. - * @dsisr: Value of dsisr register. - */ -static void ocxlflash_xsl_fault(void *data, u64 addr, u64 dsisr) -{ - struct ocxlflash_context *ctx = data; - - spin_lock(&ctx->slock); - ctx->fault_addr = addr; - ctx->fault_dsisr = dsisr; - ctx->pending_fault = true; - spin_unlock(&ctx->slock); - - wake_up_all(&ctx->wq); -} - -/** - * start_context() - local routine to start a context - * @ctx: Adapter context to be started. - * - * Assign the context specific MMIO space, add and enable the PE. - * - * Return: 0 on success, -errno on failure - */ -static int start_context(struct ocxlflash_context *ctx) -{ - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct ocxl_afu_config *acfg = &afu->acfg; - void *link_token = afu->link_token; - struct pci_dev *pdev = afu->pdev; - struct device *dev = afu->dev; - bool master = ctx->master; - struct mm_struct *mm; - int rc = 0; - u32 pid; - - mutex_lock(&ctx->state_mutex); - if (ctx->state != OPENED) { - dev_err(dev, "%s: Context state invalid, state=%d\n", - __func__, ctx->state); - rc = -EINVAL; - goto out; - } - - if (master) { - ctx->psn_size = acfg->global_mmio_size; - ctx->psn_phys = afu->gmmio_phys; - } else { - ctx->psn_size = acfg->pp_mmio_stride; - ctx->psn_phys = afu->ppmmio_phys + (ctx->pe * ctx->psn_size); - } - - /* pid and mm not set for master contexts */ - if (master) { - pid = 0; - mm = NULL; - } else { - pid = current->mm->context.id; - mm = current->mm; - } - - rc = ocxl_link_add_pe(link_token, ctx->pe, pid, 0, 0, - pci_dev_id(pdev), mm, ocxlflash_xsl_fault, - ctx); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_link_add_pe failed rc=%d\n", - __func__, rc); - goto out; - } - - ctx->state = STARTED; -out: - mutex_unlock(&ctx->state_mutex); - return rc; -} - -/** - * ocxlflash_start_context() - start a kernel context - * @ctx_cookie: Adapter context to be started. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_start_context(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - - return start_context(ctx); -} - -/** - * ocxlflash_stop_context() - stop a context - * @ctx_cookie: Adapter context to be stopped. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_stop_context(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct ocxl_afu_config *acfg = &afu->acfg; - struct pci_dev *pdev = afu->pdev; - struct device *dev = afu->dev; - enum ocxlflash_ctx_state state; - int rc = 0; - - mutex_lock(&ctx->state_mutex); - state = ctx->state; - ctx->state = CLOSED; - mutex_unlock(&ctx->state_mutex); - if (state != STARTED) - goto out; - - rc = ocxl_config_terminate_pasid(pdev, acfg->dvsec_afu_control_pos, - ctx->pe); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_config_terminate_pasid failed rc=%d\n", - __func__, rc); - /* If EBUSY, PE could be referenced in future by the AFU */ - if (rc == -EBUSY) - goto out; - } - - rc = ocxl_link_remove_pe(afu->link_token, ctx->pe); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_link_remove_pe failed rc=%d\n", - __func__, rc); - goto out; - } -out: - return rc; -} - -/** - * ocxlflash_afu_reset() - reset the AFU - * @ctx_cookie: Adapter context. - */ -static int ocxlflash_afu_reset(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct device *dev = ctx->hw_afu->dev; - - /* Pending implementation from OCXL transport services */ - dev_err_once(dev, "%s: afu_reset() fop not supported\n", __func__); - - /* Silently return success until it is implemented */ - return 0; -} - -/** - * ocxlflash_set_master() - sets the context as master - * @ctx_cookie: Adapter context to set as master. - */ -static void ocxlflash_set_master(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - - ctx->master = true; -} - -/** - * ocxlflash_get_context() - obtains the context associated with the host - * @pdev: PCI device associated with the host. - * @afu_cookie: Hardware AFU associated with the host. - * - * Return: returns the pointer to host adapter context - */ -static void *ocxlflash_get_context(struct pci_dev *pdev, void *afu_cookie) -{ - struct ocxl_hw_afu *afu = afu_cookie; - - return afu->ocxl_ctx; -} - -/** - * ocxlflash_dev_context_init() - allocate and initialize an adapter context - * @pdev: PCI device associated with the host. - * @afu_cookie: Hardware AFU associated with the host. - * - * Return: returns the adapter context on success, ERR_PTR on failure - */ -static void *ocxlflash_dev_context_init(struct pci_dev *pdev, void *afu_cookie) -{ - struct ocxl_hw_afu *afu = afu_cookie; - struct device *dev = afu->dev; - struct ocxlflash_context *ctx; - int rc; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (unlikely(!ctx)) { - dev_err(dev, "%s: Context allocation failed\n", __func__); - rc = -ENOMEM; - goto err1; - } - - idr_preload(GFP_KERNEL); - rc = idr_alloc(&afu->idr, ctx, 0, afu->max_pasid, GFP_NOWAIT); - idr_preload_end(); - if (unlikely(rc < 0)) { - dev_err(dev, "%s: idr_alloc failed rc=%d\n", __func__, rc); - goto err2; - } - - spin_lock_init(&ctx->slock); - init_waitqueue_head(&ctx->wq); - mutex_init(&ctx->state_mutex); - - ctx->state = OPENED; - ctx->pe = rc; - ctx->master = false; - ctx->mapping = NULL; - ctx->hw_afu = afu; - ctx->irq_bitmap = 0; - ctx->pending_irq = false; - ctx->pending_fault = false; -out: - return ctx; -err2: - kfree(ctx); -err1: - ctx = ERR_PTR(rc); - goto out; -} - -/** - * ocxlflash_release_context() - releases an adapter context - * @ctx_cookie: Adapter context to be released. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_release_context(void *ctx_cookie) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct device *dev; - int rc = 0; - - if (!ctx) - goto out; - - dev = ctx->hw_afu->dev; - mutex_lock(&ctx->state_mutex); - if (ctx->state >= STARTED) { - dev_err(dev, "%s: Context in use, state=%d\n", __func__, - ctx->state); - mutex_unlock(&ctx->state_mutex); - rc = -EBUSY; - goto out; - } - mutex_unlock(&ctx->state_mutex); - - idr_remove(&ctx->hw_afu->idr, ctx->pe); - ocxlflash_release_mapping(ctx); - kfree(ctx); -out: - return rc; -} - -/** - * ocxlflash_perst_reloads_same_image() - sets the image reload policy - * @afu_cookie: Hardware AFU associated with the host. - * @image: Whether to load the same image on PERST. - */ -static void ocxlflash_perst_reloads_same_image(void *afu_cookie, bool image) -{ - struct ocxl_hw_afu *afu = afu_cookie; - - afu->perst_same_image = image; -} - -/** - * ocxlflash_read_adapter_vpd() - reads the adapter VPD - * @pdev: PCI device associated with the host. - * @buf: Buffer to get the VPD data. - * @count: Size of buffer (maximum bytes that can be read). - * - * Return: size of VPD on success, -errno on failure - */ -static ssize_t ocxlflash_read_adapter_vpd(struct pci_dev *pdev, void *buf, - size_t count) -{ - return pci_read_vpd(pdev, 0, count, buf); -} - -/** - * free_afu_irqs() - internal service to free interrupts - * @ctx: Adapter context. - */ -static void free_afu_irqs(struct ocxlflash_context *ctx) -{ - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct device *dev = afu->dev; - int i; - - if (!ctx->irqs) { - dev_err(dev, "%s: Interrupts not allocated\n", __func__); - return; - } - - for (i = ctx->num_irqs; i >= 0; i--) - ocxl_link_free_irq(afu->link_token, ctx->irqs[i].hwirq); - - kfree(ctx->irqs); - ctx->irqs = NULL; -} - -/** - * alloc_afu_irqs() - internal service to allocate interrupts - * @ctx: Context associated with the request. - * @num: Number of interrupts requested. - * - * Return: 0 on success, -errno on failure - */ -static int alloc_afu_irqs(struct ocxlflash_context *ctx, int num) -{ - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct device *dev = afu->dev; - struct ocxlflash_irqs *irqs; - int rc = 0; - int hwirq; - int i; - - if (ctx->irqs) { - dev_err(dev, "%s: Interrupts already allocated\n", __func__); - rc = -EEXIST; - goto out; - } - - if (num > OCXL_MAX_IRQS) { - dev_err(dev, "%s: Too many interrupts num=%d\n", __func__, num); - rc = -EINVAL; - goto out; - } - - irqs = kcalloc(num, sizeof(*irqs), GFP_KERNEL); - if (unlikely(!irqs)) { - dev_err(dev, "%s: Context irqs allocation failed\n", __func__); - rc = -ENOMEM; - goto out; - } - - for (i = 0; i < num; i++) { - rc = ocxl_link_irq_alloc(afu->link_token, &hwirq); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_link_irq_alloc failed rc=%d\n", - __func__, rc); - goto err; - } - - irqs[i].hwirq = hwirq; - } - - ctx->irqs = irqs; - ctx->num_irqs = num; -out: - return rc; -err: - for (i = i-1; i >= 0; i--) - ocxl_link_free_irq(afu->link_token, irqs[i].hwirq); - kfree(irqs); - goto out; -} - -/** - * ocxlflash_allocate_afu_irqs() - allocates the requested number of interrupts - * @ctx_cookie: Context associated with the request. - * @num: Number of interrupts requested. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_allocate_afu_irqs(void *ctx_cookie, int num) -{ - return alloc_afu_irqs(ctx_cookie, num); -} - -/** - * ocxlflash_free_afu_irqs() - frees the interrupts of an adapter context - * @ctx_cookie: Adapter context. - */ -static void ocxlflash_free_afu_irqs(void *ctx_cookie) -{ - free_afu_irqs(ctx_cookie); -} - -/** - * ocxlflash_unconfig_afu() - unconfigure the AFU - * @afu: AFU associated with the host. - */ -static void ocxlflash_unconfig_afu(struct ocxl_hw_afu *afu) -{ - if (afu->gmmio_virt) { - iounmap(afu->gmmio_virt); - afu->gmmio_virt = NULL; - } -} - -/** - * ocxlflash_destroy_afu() - destroy the AFU structure - * @afu_cookie: AFU to be freed. - */ -static void ocxlflash_destroy_afu(void *afu_cookie) -{ - struct ocxl_hw_afu *afu = afu_cookie; - int pos; - - if (!afu) - return; - - ocxlflash_release_context(afu->ocxl_ctx); - idr_destroy(&afu->idr); - - /* Disable the AFU */ - pos = afu->acfg.dvsec_afu_control_pos; - ocxl_config_set_afu_state(afu->pdev, pos, 0); - - ocxlflash_unconfig_afu(afu); - kfree(afu); -} - -/** - * ocxlflash_config_fn() - configure the host function - * @pdev: PCI device associated with the host. - * @afu: AFU associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_config_fn(struct pci_dev *pdev, struct ocxl_hw_afu *afu) -{ - struct ocxl_fn_config *fcfg = &afu->fcfg; - struct device *dev = &pdev->dev; - u16 base, enabled, supported; - int rc = 0; - - /* Read DVSEC config of the function */ - rc = ocxl_config_read_function(pdev, fcfg); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_config_read_function failed rc=%d\n", - __func__, rc); - goto out; - } - - /* Check if function has AFUs defined, only 1 per function supported */ - if (fcfg->max_afu_index >= 0) { - afu->is_present = true; - if (fcfg->max_afu_index != 0) - dev_warn(dev, "%s: Unexpected AFU index value %d\n", - __func__, fcfg->max_afu_index); - } - - rc = ocxl_config_get_actag_info(pdev, &base, &enabled, &supported); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_config_get_actag_info failed rc=%d\n", - __func__, rc); - goto out; - } - - afu->fn_actag_base = base; - afu->fn_actag_enabled = enabled; - - ocxl_config_set_actag(pdev, fcfg->dvsec_function_pos, base, enabled); - dev_dbg(dev, "%s: Function acTag range base=%u enabled=%u\n", - __func__, base, enabled); - - rc = ocxl_link_setup(pdev, 0, &afu->link_token); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_link_setup failed rc=%d\n", - __func__, rc); - goto out; - } - - rc = ocxl_config_set_TL(pdev, fcfg->dvsec_tl_pos); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_config_set_TL failed rc=%d\n", - __func__, rc); - goto err; - } -out: - return rc; -err: - ocxl_link_release(pdev, afu->link_token); - goto out; -} - -/** - * ocxlflash_unconfig_fn() - unconfigure the host function - * @pdev: PCI device associated with the host. - * @afu: AFU associated with the host. - */ -static void ocxlflash_unconfig_fn(struct pci_dev *pdev, struct ocxl_hw_afu *afu) -{ - ocxl_link_release(pdev, afu->link_token); -} - -/** - * ocxlflash_map_mmio() - map the AFU MMIO space - * @afu: AFU associated with the host. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_map_mmio(struct ocxl_hw_afu *afu) -{ - struct ocxl_afu_config *acfg = &afu->acfg; - struct pci_dev *pdev = afu->pdev; - struct device *dev = afu->dev; - phys_addr_t gmmio, ppmmio; - int rc = 0; - - rc = pci_request_region(pdev, acfg->global_mmio_bar, "ocxlflash"); - if (unlikely(rc)) { - dev_err(dev, "%s: pci_request_region for global failed rc=%d\n", - __func__, rc); - goto out; - } - gmmio = pci_resource_start(pdev, acfg->global_mmio_bar); - gmmio += acfg->global_mmio_offset; - - rc = pci_request_region(pdev, acfg->pp_mmio_bar, "ocxlflash"); - if (unlikely(rc)) { - dev_err(dev, "%s: pci_request_region for pp bar failed rc=%d\n", - __func__, rc); - goto err1; - } - ppmmio = pci_resource_start(pdev, acfg->pp_mmio_bar); - ppmmio += acfg->pp_mmio_offset; - - afu->gmmio_virt = ioremap(gmmio, acfg->global_mmio_size); - if (unlikely(!afu->gmmio_virt)) { - dev_err(dev, "%s: MMIO mapping failed\n", __func__); - rc = -ENOMEM; - goto err2; - } - - afu->gmmio_phys = gmmio; - afu->ppmmio_phys = ppmmio; -out: - return rc; -err2: - pci_release_region(pdev, acfg->pp_mmio_bar); -err1: - pci_release_region(pdev, acfg->global_mmio_bar); - goto out; -} - -/** - * ocxlflash_config_afu() - configure the host AFU - * @pdev: PCI device associated with the host. - * @afu: AFU associated with the host. - * - * Must be called _after_ host function configuration. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_config_afu(struct pci_dev *pdev, struct ocxl_hw_afu *afu) -{ - struct ocxl_afu_config *acfg = &afu->acfg; - struct ocxl_fn_config *fcfg = &afu->fcfg; - struct device *dev = &pdev->dev; - int count; - int base; - int pos; - int rc = 0; - - /* This HW AFU function does not have any AFUs defined */ - if (!afu->is_present) - goto out; - - /* Read AFU config at index 0 */ - rc = ocxl_config_read_afu(pdev, fcfg, acfg, 0); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxl_config_read_afu failed rc=%d\n", - __func__, rc); - goto out; - } - - /* Only one AFU per function is supported, so actag_base is same */ - base = afu->fn_actag_base; - count = min_t(int, acfg->actag_supported, afu->fn_actag_enabled); - pos = acfg->dvsec_afu_control_pos; - - ocxl_config_set_afu_actag(pdev, pos, base, count); - dev_dbg(dev, "%s: acTag base=%d enabled=%d\n", __func__, base, count); - afu->afu_actag_base = base; - afu->afu_actag_enabled = count; - afu->max_pasid = 1 << acfg->pasid_supported_log; - - ocxl_config_set_afu_pasid(pdev, pos, 0, acfg->pasid_supported_log); - - rc = ocxlflash_map_mmio(afu); - if (unlikely(rc)) { - dev_err(dev, "%s: ocxlflash_map_mmio failed rc=%d\n", - __func__, rc); - goto out; - } - - /* Enable the AFU */ - ocxl_config_set_afu_state(pdev, acfg->dvsec_afu_control_pos, 1); -out: - return rc; -} - -/** - * ocxlflash_create_afu() - create the AFU for OCXL - * @pdev: PCI device associated with the host. - * - * Return: AFU on success, NULL on failure - */ -static void *ocxlflash_create_afu(struct pci_dev *pdev) -{ - struct device *dev = &pdev->dev; - struct ocxlflash_context *ctx; - struct ocxl_hw_afu *afu; - int rc; - - afu = kzalloc(sizeof(*afu), GFP_KERNEL); - if (unlikely(!afu)) { - dev_err(dev, "%s: HW AFU allocation failed\n", __func__); - goto out; - } - - afu->pdev = pdev; - afu->dev = dev; - idr_init(&afu->idr); - - rc = ocxlflash_config_fn(pdev, afu); - if (unlikely(rc)) { - dev_err(dev, "%s: Function configuration failed rc=%d\n", - __func__, rc); - goto err1; - } - - rc = ocxlflash_config_afu(pdev, afu); - if (unlikely(rc)) { - dev_err(dev, "%s: AFU configuration failed rc=%d\n", - __func__, rc); - goto err2; - } - - ctx = ocxlflash_dev_context_init(pdev, afu); - if (IS_ERR(ctx)) { - rc = PTR_ERR(ctx); - dev_err(dev, "%s: ocxlflash_dev_context_init failed rc=%d\n", - __func__, rc); - goto err3; - } - - afu->ocxl_ctx = ctx; -out: - return afu; -err3: - ocxlflash_unconfig_afu(afu); -err2: - ocxlflash_unconfig_fn(pdev, afu); -err1: - idr_destroy(&afu->idr); - kfree(afu); - afu = NULL; - goto out; -} - -/** - * ctx_event_pending() - check for any event pending on the context - * @ctx: Context to be checked. - * - * Return: true if there is an event pending, false if none pending - */ -static inline bool ctx_event_pending(struct ocxlflash_context *ctx) -{ - if (ctx->pending_irq || ctx->pending_fault) - return true; - - return false; -} - -/** - * afu_poll() - poll the AFU for events on the context - * @file: File associated with the adapter context. - * @poll: Poll structure from the user. - * - * Return: poll mask - */ -static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) -{ - struct ocxlflash_context *ctx = file->private_data; - struct device *dev = ctx->hw_afu->dev; - ulong lock_flags; - int mask = 0; - - poll_wait(file, &ctx->wq, poll); - - spin_lock_irqsave(&ctx->slock, lock_flags); - if (ctx_event_pending(ctx)) - mask |= POLLIN | POLLRDNORM; - else if (ctx->state == CLOSED) - mask |= POLLERR; - spin_unlock_irqrestore(&ctx->slock, lock_flags); - - dev_dbg(dev, "%s: Poll wait completed for pe %i mask %i\n", - __func__, ctx->pe, mask); - - return mask; -} - -/** - * afu_read() - perform a read on the context for any event - * @file: File associated with the adapter context. - * @buf: Buffer to receive the data. - * @count: Size of buffer (maximum bytes that can be read). - * @off: Offset. - * - * Return: size of the data read on success, -errno on failure - */ -static ssize_t afu_read(struct file *file, char __user *buf, size_t count, - loff_t *off) -{ - struct ocxlflash_context *ctx = file->private_data; - struct device *dev = ctx->hw_afu->dev; - struct cxl_event event; - ulong lock_flags; - ssize_t esize; - ssize_t rc; - int bit; - DEFINE_WAIT(event_wait); - - if (*off != 0) { - dev_err(dev, "%s: Non-zero offset not supported, off=%lld\n", - __func__, *off); - rc = -EINVAL; - goto out; - } - - spin_lock_irqsave(&ctx->slock, lock_flags); - - for (;;) { - prepare_to_wait(&ctx->wq, &event_wait, TASK_INTERRUPTIBLE); - - if (ctx_event_pending(ctx) || (ctx->state == CLOSED)) - break; - - if (file->f_flags & O_NONBLOCK) { - dev_err(dev, "%s: File cannot be blocked on I/O\n", - __func__); - rc = -EAGAIN; - goto err; - } - - if (signal_pending(current)) { - dev_err(dev, "%s: Signal pending on the process\n", - __func__); - rc = -ERESTARTSYS; - goto err; - } - - spin_unlock_irqrestore(&ctx->slock, lock_flags); - schedule(); - spin_lock_irqsave(&ctx->slock, lock_flags); - } - - finish_wait(&ctx->wq, &event_wait); - - memset(&event, 0, sizeof(event)); - event.header.process_element = ctx->pe; - event.header.size = sizeof(struct cxl_event_header); - if (ctx->pending_irq) { - esize = sizeof(struct cxl_event_afu_interrupt); - event.header.size += esize; - event.header.type = CXL_EVENT_AFU_INTERRUPT; - - bit = find_first_bit(&ctx->irq_bitmap, ctx->num_irqs); - clear_bit(bit, &ctx->irq_bitmap); - event.irq.irq = bit + 1; - if (bitmap_empty(&ctx->irq_bitmap, ctx->num_irqs)) - ctx->pending_irq = false; - } else if (ctx->pending_fault) { - event.header.size += sizeof(struct cxl_event_data_storage); - event.header.type = CXL_EVENT_DATA_STORAGE; - event.fault.addr = ctx->fault_addr; - event.fault.dsisr = ctx->fault_dsisr; - ctx->pending_fault = false; - } - - spin_unlock_irqrestore(&ctx->slock, lock_flags); - - if (copy_to_user(buf, &event, event.header.size)) { - dev_err(dev, "%s: copy_to_user failed\n", __func__); - rc = -EFAULT; - goto out; - } - - rc = event.header.size; -out: - return rc; -err: - finish_wait(&ctx->wq, &event_wait); - spin_unlock_irqrestore(&ctx->slock, lock_flags); - goto out; -} - -/** - * afu_release() - release and free the context - * @inode: File inode pointer. - * @file: File associated with the context. - * - * Return: 0 on success, -errno on failure - */ -static int afu_release(struct inode *inode, struct file *file) -{ - struct ocxlflash_context *ctx = file->private_data; - int i; - - /* Unmap and free the interrupts associated with the context */ - for (i = ctx->num_irqs; i >= 0; i--) - afu_unmap_irq(0, ctx, i, ctx); - free_afu_irqs(ctx); - - return ocxlflash_release_context(ctx); -} - -/** - * ocxlflash_mmap_fault() - mmap fault handler - * @vmf: VM fault associated with current fault. - * - * Return: 0 on success, -errno on failure - */ -static vm_fault_t ocxlflash_mmap_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct ocxlflash_context *ctx = vma->vm_file->private_data; - struct device *dev = ctx->hw_afu->dev; - u64 mmio_area, offset; - - offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= ctx->psn_size) - return VM_FAULT_SIGBUS; - - mutex_lock(&ctx->state_mutex); - if (ctx->state != STARTED) { - dev_err(dev, "%s: Context not started, state=%d\n", - __func__, ctx->state); - mutex_unlock(&ctx->state_mutex); - return VM_FAULT_SIGBUS; - } - mutex_unlock(&ctx->state_mutex); - - mmio_area = ctx->psn_phys; - mmio_area += offset; - - return vmf_insert_pfn(vma, vmf->address, mmio_area >> PAGE_SHIFT); -} - -static const struct vm_operations_struct ocxlflash_vmops = { - .fault = ocxlflash_mmap_fault, -}; - -/** - * afu_mmap() - map the fault handler operations - * @file: File associated with the context. - * @vma: VM area associated with mapping. - * - * Return: 0 on success, -errno on failure - */ -static int afu_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct ocxlflash_context *ctx = file->private_data; - - if ((vma_pages(vma) + vma->vm_pgoff) > - (ctx->psn_size >> PAGE_SHIFT)) - return -EINVAL; - - vm_flags_set(vma, VM_IO | VM_PFNMAP); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_ops = &ocxlflash_vmops; - return 0; -} - -static const struct file_operations ocxl_afu_fops = { - .owner = THIS_MODULE, - .poll = afu_poll, - .read = afu_read, - .release = afu_release, - .mmap = afu_mmap, -}; - -#define PATCH_FOPS(NAME) \ - do { if (!fops->NAME) fops->NAME = ocxl_afu_fops.NAME; } while (0) - -/** - * ocxlflash_get_fd() - get file descriptor for an adapter context - * @ctx_cookie: Adapter context. - * @fops: File operations to be associated. - * @fd: File descriptor to be returned back. - * - * Return: pointer to the file on success, ERR_PTR on failure - */ -static struct file *ocxlflash_get_fd(void *ctx_cookie, - struct file_operations *fops, int *fd) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct device *dev = ctx->hw_afu->dev; - struct file *file; - int flags, fdtmp; - int rc = 0; - char *name = NULL; - - /* Only allow one fd per context */ - if (ctx->mapping) { - dev_err(dev, "%s: Context is already mapped to an fd\n", - __func__); - rc = -EEXIST; - goto err1; - } - - flags = O_RDWR | O_CLOEXEC; - - /* This code is similar to anon_inode_getfd() */ - rc = get_unused_fd_flags(flags); - if (unlikely(rc < 0)) { - dev_err(dev, "%s: get_unused_fd_flags failed rc=%d\n", - __func__, rc); - goto err1; - } - fdtmp = rc; - - /* Patch the file ops that are not defined */ - if (fops) { - PATCH_FOPS(poll); - PATCH_FOPS(read); - PATCH_FOPS(release); - PATCH_FOPS(mmap); - } else /* Use default ops */ - fops = (struct file_operations *)&ocxl_afu_fops; - - name = kasprintf(GFP_KERNEL, "ocxlflash:%d", ctx->pe); - file = ocxlflash_getfile(dev, name, fops, ctx, flags); - kfree(name); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - dev_err(dev, "%s: ocxlflash_getfile failed rc=%d\n", - __func__, rc); - goto err2; - } - - ctx->mapping = file->f_mapping; - *fd = fdtmp; -out: - return file; -err2: - put_unused_fd(fdtmp); -err1: - file = ERR_PTR(rc); - goto out; -} - -/** - * ocxlflash_fops_get_context() - get the context associated with the file - * @file: File associated with the adapter context. - * - * Return: pointer to the context - */ -static void *ocxlflash_fops_get_context(struct file *file) -{ - return file->private_data; -} - -/** - * ocxlflash_afu_irq() - interrupt handler for user contexts - * @irq: Interrupt number. - * @data: Private data provided at interrupt registration, the context. - * - * Return: Always return IRQ_HANDLED. - */ -static irqreturn_t ocxlflash_afu_irq(int irq, void *data) -{ - struct ocxlflash_context *ctx = data; - struct device *dev = ctx->hw_afu->dev; - int i; - - dev_dbg(dev, "%s: Interrupt raised for pe %i virq %i\n", - __func__, ctx->pe, irq); - - for (i = 0; i < ctx->num_irqs; i++) { - if (ctx->irqs[i].virq == irq) - break; - } - if (unlikely(i >= ctx->num_irqs)) { - dev_err(dev, "%s: Received AFU IRQ out of range\n", __func__); - goto out; - } - - spin_lock(&ctx->slock); - set_bit(i - 1, &ctx->irq_bitmap); - ctx->pending_irq = true; - spin_unlock(&ctx->slock); - - wake_up_all(&ctx->wq); -out: - return IRQ_HANDLED; -} - -/** - * ocxlflash_start_work() - start a user context - * @ctx_cookie: Context to be started. - * @num_irqs: Number of interrupts requested. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_start_work(void *ctx_cookie, u64 num_irqs) -{ - struct ocxlflash_context *ctx = ctx_cookie; - struct ocxl_hw_afu *afu = ctx->hw_afu; - struct device *dev = afu->dev; - char *name; - int rc = 0; - int i; - - rc = alloc_afu_irqs(ctx, num_irqs); - if (unlikely(rc < 0)) { - dev_err(dev, "%s: alloc_afu_irqs failed rc=%d\n", __func__, rc); - goto out; - } - - for (i = 0; i < num_irqs; i++) { - name = kasprintf(GFP_KERNEL, "ocxlflash-%s-pe%i-%i", - dev_name(dev), ctx->pe, i); - rc = afu_map_irq(0, ctx, i, ocxlflash_afu_irq, ctx, name); - kfree(name); - if (unlikely(rc < 0)) { - dev_err(dev, "%s: afu_map_irq failed rc=%d\n", - __func__, rc); - goto err; - } - } - - rc = start_context(ctx); - if (unlikely(rc)) { - dev_err(dev, "%s: start_context failed rc=%d\n", __func__, rc); - goto err; - } -out: - return rc; -err: - for (i = i-1; i >= 0; i--) - afu_unmap_irq(0, ctx, i, ctx); - free_afu_irqs(ctx); - goto out; -}; - -/** - * ocxlflash_fd_mmap() - mmap handler for adapter file descriptor - * @file: File installed with adapter file descriptor. - * @vma: VM area associated with mapping. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_fd_mmap(struct file *file, struct vm_area_struct *vma) -{ - return afu_mmap(file, vma); -} - -/** - * ocxlflash_fd_release() - release the context associated with the file - * @inode: File inode pointer. - * @file: File associated with the adapter context. - * - * Return: 0 on success, -errno on failure - */ -static int ocxlflash_fd_release(struct inode *inode, struct file *file) -{ - return afu_release(inode, file); -} - -/* Backend ops to ocxlflash services */ -const struct cxlflash_backend_ops cxlflash_ocxl_ops = { - .module = THIS_MODULE, - .psa_map = ocxlflash_psa_map, - .psa_unmap = ocxlflash_psa_unmap, - .process_element = ocxlflash_process_element, - .map_afu_irq = ocxlflash_map_afu_irq, - .unmap_afu_irq = ocxlflash_unmap_afu_irq, - .get_irq_objhndl = ocxlflash_get_irq_objhndl, - .start_context = ocxlflash_start_context, - .stop_context = ocxlflash_stop_context, - .afu_reset = ocxlflash_afu_reset, - .set_master = ocxlflash_set_master, - .get_context = ocxlflash_get_context, - .dev_context_init = ocxlflash_dev_context_init, - .release_context = ocxlflash_release_context, - .perst_reloads_same_image = ocxlflash_perst_reloads_same_image, - .read_adapter_vpd = ocxlflash_read_adapter_vpd, - .allocate_afu_irqs = ocxlflash_allocate_afu_irqs, - .free_afu_irqs = ocxlflash_free_afu_irqs, - .create_afu = ocxlflash_create_afu, - .destroy_afu = ocxlflash_destroy_afu, - .get_fd = ocxlflash_get_fd, - .fops_get_context = ocxlflash_fops_get_context, - .start_work = ocxlflash_start_work, - .fd_mmap = ocxlflash_fd_mmap, - .fd_release = ocxlflash_fd_release, -}; diff --git a/drivers/scsi/cxlflash/ocxl_hw.h b/drivers/scsi/cxlflash/ocxl_hw.h deleted file mode 100644 index f2fe88816bea..000000000000 --- a/drivers/scsi/cxlflash/ocxl_hw.h +++ /dev/null @@ -1,72 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * Uma Krishnan <ukrishn@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2018 IBM Corporation - */ - -#define OCXL_MAX_IRQS 4 /* Max interrupts per process */ - -struct ocxlflash_irqs { - int hwirq; - u32 virq; - void __iomem *vtrig; -}; - -/* OCXL hardware AFU associated with the host */ -struct ocxl_hw_afu { - struct ocxlflash_context *ocxl_ctx; /* Host context */ - struct pci_dev *pdev; /* PCI device */ - struct device *dev; /* Generic device */ - bool perst_same_image; /* Same image loaded on perst */ - - struct ocxl_fn_config fcfg; /* DVSEC config of the function */ - struct ocxl_afu_config acfg; /* AFU configuration data */ - - int fn_actag_base; /* Function acTag base */ - int fn_actag_enabled; /* Function acTag number enabled */ - int afu_actag_base; /* AFU acTag base */ - int afu_actag_enabled; /* AFU acTag number enabled */ - - phys_addr_t ppmmio_phys; /* Per process MMIO space */ - phys_addr_t gmmio_phys; /* Global AFU MMIO space */ - void __iomem *gmmio_virt; /* Global MMIO map */ - - void *link_token; /* Link token for the SPA */ - struct idr idr; /* IDR to manage contexts */ - int max_pasid; /* Maximum number of contexts */ - bool is_present; /* Function has AFUs defined */ -}; - -enum ocxlflash_ctx_state { - CLOSED, - OPENED, - STARTED -}; - -struct ocxlflash_context { - struct ocxl_hw_afu *hw_afu; /* HW AFU back pointer */ - struct address_space *mapping; /* Mapping for pseudo filesystem */ - bool master; /* Whether this is a master context */ - int pe; /* Process element */ - - phys_addr_t psn_phys; /* Process mapping */ - u64 psn_size; /* Process mapping size */ - - spinlock_t slock; /* Protects irq/fault/event updates */ - wait_queue_head_t wq; /* Wait queue for poll and interrupts */ - struct mutex state_mutex; /* Mutex to update context state */ - enum ocxlflash_ctx_state state; /* Context state */ - - struct ocxlflash_irqs *irqs; /* Pointer to array of structures */ - int num_irqs; /* Number of interrupts */ - bool pending_irq; /* Pending interrupt on the context */ - ulong irq_bitmap; /* Bits indicating pending irq num */ - - u64 fault_addr; /* Address that triggered the fault */ - u64 fault_dsisr; /* Value of dsisr register at fault */ - bool pending_fault; /* Pending translation fault */ -}; diff --git a/drivers/scsi/cxlflash/sislite.h b/drivers/scsi/cxlflash/sislite.h deleted file mode 100644 index ab315c59505b..000000000000 --- a/drivers/scsi/cxlflash/sislite.h +++ /dev/null @@ -1,560 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#ifndef _SISLITE_H -#define _SISLITE_H - -#include <linux/types.h> - -typedef u16 ctx_hndl_t; -typedef u32 res_hndl_t; - -#define SIZE_4K 4096 -#define SIZE_64K 65536 - -/* - * IOARCB: 64 bytes, min 16 byte alignment required, host native endianness - * except for SCSI CDB which remains big endian per SCSI standards. - */ -struct sisl_ioarcb { - u16 ctx_id; /* ctx_hndl_t */ - u16 req_flags; -#define SISL_REQ_FLAGS_RES_HNDL 0x8000U /* bit 0 (MSB) */ -#define SISL_REQ_FLAGS_PORT_LUN_ID 0x0000U - -#define SISL_REQ_FLAGS_SUP_UNDERRUN 0x4000U /* bit 1 */ - -#define SISL_REQ_FLAGS_TIMEOUT_SECS 0x0000U /* bits 8,9 */ -#define SISL_REQ_FLAGS_TIMEOUT_MSECS 0x0040U -#define SISL_REQ_FLAGS_TIMEOUT_USECS 0x0080U -#define SISL_REQ_FLAGS_TIMEOUT_CYCLES 0x00C0U - -#define SISL_REQ_FLAGS_TMF_CMD 0x0004u /* bit 13 */ - -#define SISL_REQ_FLAGS_AFU_CMD 0x0002U /* bit 14 */ - -#define SISL_REQ_FLAGS_HOST_WRITE 0x0001U /* bit 15 (LSB) */ -#define SISL_REQ_FLAGS_HOST_READ 0x0000U - - union { - u32 res_hndl; /* res_hndl_t */ - u32 port_sel; /* this is a selection mask: - * 0x1 -> port#0 can be selected, - * 0x2 -> port#1 can be selected. - * Can be bitwise ORed. - */ - }; - u64 lun_id; - u32 data_len; /* 4K for read/write */ - u32 ioadl_len; - union { - u64 data_ea; /* min 16 byte aligned */ - u64 ioadl_ea; - }; - u8 msi; /* LISN to send on RRQ write */ -#define SISL_MSI_CXL_PFAULT 0 /* reserved for CXL page faults */ -#define SISL_MSI_SYNC_ERROR 1 /* recommended for AFU sync error */ -#define SISL_MSI_RRQ_UPDATED 2 /* recommended for IO completion */ -#define SISL_MSI_ASYNC_ERROR 3 /* master only - for AFU async error */ - - u8 rrq; /* 0 for a single RRQ */ - u16 timeout; /* in units specified by req_flags */ - u32 rsvd1; - u8 cdb[16]; /* must be in big endian */ -#define SISL_AFU_CMD_SYNC 0xC0 /* AFU sync command */ -#define SISL_AFU_CMD_LUN_PROVISION 0xD0 /* AFU LUN provision command */ -#define SISL_AFU_CMD_DEBUG 0xE0 /* AFU debug command */ - -#define SISL_AFU_LUN_PROVISION_CREATE 0x00 /* LUN provision create type */ -#define SISL_AFU_LUN_PROVISION_DELETE 0x01 /* LUN provision delete type */ - - union { - u64 reserved; /* Reserved for IOARRIN mode */ - struct sisl_ioasa *ioasa; /* IOASA EA for SQ Mode */ - }; -} __packed; - -struct sisl_rc { - u8 flags; -#define SISL_RC_FLAGS_SENSE_VALID 0x80U -#define SISL_RC_FLAGS_FCP_RSP_CODE_VALID 0x40U -#define SISL_RC_FLAGS_OVERRUN 0x20U -#define SISL_RC_FLAGS_UNDERRUN 0x10U - - u8 afu_rc; -#define SISL_AFU_RC_RHT_INVALID 0x01U /* user error */ -#define SISL_AFU_RC_RHT_UNALIGNED 0x02U /* should never happen */ -#define SISL_AFU_RC_RHT_OUT_OF_BOUNDS 0x03u /* user error */ -#define SISL_AFU_RC_RHT_DMA_ERR 0x04u /* see afu_extra - * may retry if afu_retry is off - * possible on master exit - */ -#define SISL_AFU_RC_RHT_RW_PERM 0x05u /* no RW perms, user error */ -#define SISL_AFU_RC_LXT_UNALIGNED 0x12U /* should never happen */ -#define SISL_AFU_RC_LXT_OUT_OF_BOUNDS 0x13u /* user error */ -#define SISL_AFU_RC_LXT_DMA_ERR 0x14u /* see afu_extra - * may retry if afu_retry is off - * possible on master exit - */ -#define SISL_AFU_RC_LXT_RW_PERM 0x15u /* no RW perms, user error */ - -#define SISL_AFU_RC_NOT_XLATE_HOST 0x1au /* possible if master exited */ - - /* NO_CHANNELS means the FC ports selected by dest_port in - * IOARCB or in the LXT entry are down when the AFU tried to select - * a FC port. If the port went down on an active IO, it will set - * fc_rc to =0x54(NOLOGI) or 0x57(LINKDOWN) instead. - */ -#define SISL_AFU_RC_NO_CHANNELS 0x20U /* see afu_extra, may retry */ -#define SISL_AFU_RC_CAP_VIOLATION 0x21U /* either user error or - * afu reset/master restart - */ -#define SISL_AFU_RC_OUT_OF_DATA_BUFS 0x30U /* always retry */ -#define SISL_AFU_RC_DATA_DMA_ERR 0x31U /* see afu_extra - * may retry if afu_retry is off - */ - - u8 scsi_rc; /* SCSI status byte, retry as appropriate */ -#define SISL_SCSI_RC_CHECK 0x02U -#define SISL_SCSI_RC_BUSY 0x08u - - u8 fc_rc; /* retry */ - /* - * We should only see fc_rc=0x57 (LINKDOWN) or 0x54(NOLOGI) for - * commands that are in flight when a link goes down or is logged out. - * If the link is down or logged out before AFU selects the port, either - * it will choose the other port or we will get afu_rc=0x20 (no_channel) - * if there is no valid port to use. - * - * ABORTPEND/ABORTOK/ABORTFAIL/TGTABORT can be retried, typically these - * would happen if a frame is dropped and something times out. - * NOLOGI or LINKDOWN can be retried if the other port is up. - * RESIDERR can be retried as well. - * - * ABORTFAIL might indicate that lots of frames are getting CRC errors. - * So it maybe retried once and reset the link if it happens again. - * The link can also be reset on the CRC error threshold interrupt. - */ -#define SISL_FC_RC_ABORTPEND 0x52 /* exchange timeout or abort request */ -#define SISL_FC_RC_WRABORTPEND 0x53 /* due to write XFER_RDY invalid */ -#define SISL_FC_RC_NOLOGI 0x54 /* port not logged in, in-flight cmds */ -#define SISL_FC_RC_NOEXP 0x55 /* FC protocol error or HW bug */ -#define SISL_FC_RC_INUSE 0x56 /* tag already in use, HW bug */ -#define SISL_FC_RC_LINKDOWN 0x57 /* link down, in-flight cmds */ -#define SISL_FC_RC_ABORTOK 0x58 /* pending abort completed w/success */ -#define SISL_FC_RC_ABORTFAIL 0x59 /* pending abort completed w/fail */ -#define SISL_FC_RC_RESID 0x5A /* ioasa underrun/overrun flags set */ -#define SISL_FC_RC_RESIDERR 0x5B /* actual data len does not match SCSI - * reported len, possibly due to dropped - * frames - */ -#define SISL_FC_RC_TGTABORT 0x5C /* command aborted by target */ -}; - -#define SISL_SENSE_DATA_LEN 20 /* Sense data length */ -#define SISL_WWID_DATA_LEN 16 /* WWID data length */ - -/* - * IOASA: 64 bytes & must follow IOARCB, min 16 byte alignment required, - * host native endianness - */ -struct sisl_ioasa { - union { - struct sisl_rc rc; - u32 ioasc; -#define SISL_IOASC_GOOD_COMPLETION 0x00000000U - }; - - union { - u32 resid; - u32 lunid_hi; - }; - - u8 port; - u8 afu_extra; - /* when afu_rc=0x04, 0x14, 0x31 (_xxx_DMA_ERR): - * afu_exta contains PSL response code. Useful codes are: - */ -#define SISL_AFU_DMA_ERR_PAGE_IN 0x0A /* AFU_retry_on_pagein Action - * Enabled N/A - * Disabled retry - */ -#define SISL_AFU_DMA_ERR_INVALID_EA 0x0B /* this is a hard error - * afu_rc Implies - * 0x04, 0x14 master exit. - * 0x31 user error. - */ - /* when afu rc=0x20 (no channels): - * afu_extra bits [4:5]: available portmask, [6:7]: requested portmask. - */ -#define SISL_AFU_NO_CLANNELS_AMASK(afu_extra) (((afu_extra) & 0x0C) >> 2) -#define SISL_AFU_NO_CLANNELS_RMASK(afu_extra) ((afu_extra) & 0x03) - - u8 scsi_extra; - u8 fc_extra; - - union { - u8 sense_data[SISL_SENSE_DATA_LEN]; - struct { - u32 lunid_lo; - u8 wwid[SISL_WWID_DATA_LEN]; - }; - }; - - /* These fields are defined by the SISlite architecture for the - * host to use as they see fit for their implementation. - */ - union { - u64 host_use[4]; - u8 host_use_b[32]; - }; -} __packed; - -#define SISL_RESP_HANDLE_T_BIT 0x1ULL /* Toggle bit */ - -/* MMIO space is required to support only 64-bit access */ - -/* - * This AFU has two mechanisms to deal with endian-ness. - * One is a global configuration (in the afu_config) register - * below that specifies the endian-ness of the host. - * The other is a per context (i.e. application) specification - * controlled by the endian_ctrl field here. Since the master - * context is one such application the master context's - * endian-ness is set to be the same as the host. - * - * As per the SISlite spec, the MMIO registers are always - * big endian. - */ -#define SISL_ENDIAN_CTRL_BE 0x8000000000000080ULL -#define SISL_ENDIAN_CTRL_LE 0x0000000000000000ULL - -#ifdef __BIG_ENDIAN -#define SISL_ENDIAN_CTRL SISL_ENDIAN_CTRL_BE -#else -#define SISL_ENDIAN_CTRL SISL_ENDIAN_CTRL_LE -#endif - -/* per context host transport MMIO */ -struct sisl_host_map { - __be64 endian_ctrl; /* Per context Endian Control. The AFU will - * operate on whatever the context is of the - * host application. - */ - - __be64 intr_status; /* this sends LISN# programmed in ctx_ctrl. - * Only recovery in a PERM_ERR is a context - * exit since there is no way to tell which - * command caused the error. - */ -#define SISL_ISTATUS_PERM_ERR_LISN_3_EA 0x0400ULL /* b53, user error */ -#define SISL_ISTATUS_PERM_ERR_LISN_2_EA 0x0200ULL /* b54, user error */ -#define SISL_ISTATUS_PERM_ERR_LISN_1_EA 0x0100ULL /* b55, user error */ -#define SISL_ISTATUS_PERM_ERR_LISN_3_PASID 0x0080ULL /* b56, user error */ -#define SISL_ISTATUS_PERM_ERR_LISN_2_PASID 0x0040ULL /* b57, user error */ -#define SISL_ISTATUS_PERM_ERR_LISN_1_PASID 0x0020ULL /* b58, user error */ -#define SISL_ISTATUS_PERM_ERR_CMDROOM 0x0010ULL /* b59, user error */ -#define SISL_ISTATUS_PERM_ERR_RCB_READ 0x0008ULL /* b60, user error */ -#define SISL_ISTATUS_PERM_ERR_SA_WRITE 0x0004ULL /* b61, user error */ -#define SISL_ISTATUS_PERM_ERR_RRQ_WRITE 0x0002ULL /* b62, user error */ - /* Page in wait accessing RCB/IOASA/RRQ is reported in b63. - * Same error in data/LXT/RHT access is reported via IOASA. - */ -#define SISL_ISTATUS_TEMP_ERR_PAGEIN 0x0001ULL /* b63, can only be - * generated when AFU - * auto retry is - * disabled. If user - * can determine the - * command that caused - * the error, it can - * be retried. - */ -#define SISL_ISTATUS_UNMASK (0x07FFULL) /* 1 means unmasked */ -#define SISL_ISTATUS_MASK ~(SISL_ISTATUS_UNMASK) /* 1 means masked */ - - __be64 intr_clear; - __be64 intr_mask; - __be64 ioarrin; /* only write what cmd_room permits */ - __be64 rrq_start; /* start & end are both inclusive */ - __be64 rrq_end; /* write sequence: start followed by end */ - __be64 cmd_room; - __be64 ctx_ctrl; /* least significant byte or b56:63 is LISN# */ -#define SISL_CTX_CTRL_UNMAP_SECTOR 0x8000000000000000ULL /* b0 */ -#define SISL_CTX_CTRL_LISN_MASK (0xFFULL) - __be64 mbox_w; /* restricted use */ - __be64 sq_start; /* Submission Queue (R/W): write sequence and */ - __be64 sq_end; /* inclusion semantics are the same as RRQ */ - __be64 sq_head; /* Submission Queue Head (R): for debugging */ - __be64 sq_tail; /* Submission Queue TAIL (R/W): next IOARCB */ - __be64 sq_ctx_reset; /* Submission Queue Context Reset (R/W) */ -}; - -/* per context provisioning & control MMIO */ -struct sisl_ctrl_map { - __be64 rht_start; - __be64 rht_cnt_id; - /* both cnt & ctx_id args must be ULL */ -#define SISL_RHT_CNT_ID(cnt, ctx_id) (((cnt) << 48) | ((ctx_id) << 32)) - - __be64 ctx_cap; /* afu_rc below is when the capability is violated */ -#define SISL_CTX_CAP_PROXY_ISSUE 0x8000000000000000ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_REAL_MODE 0x4000000000000000ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_HOST_XLATE 0x2000000000000000ULL /* afu_rc 0x1a */ -#define SISL_CTX_CAP_PROXY_TARGET 0x1000000000000000ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_AFU_CMD 0x0000000000000008ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_GSCSI_CMD 0x0000000000000004ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_WRITE_CMD 0x0000000000000002ULL /* afu_rc 0x21 */ -#define SISL_CTX_CAP_READ_CMD 0x0000000000000001ULL /* afu_rc 0x21 */ - __be64 mbox_r; - __be64 lisn_pasid[2]; - /* pasid _a arg must be ULL */ -#define SISL_LISN_PASID(_a, _b) (((_a) << 32) | (_b)) - __be64 lisn_ea[3]; -}; - -/* single copy global regs */ -struct sisl_global_regs { - __be64 aintr_status; - /* - * In cxlflash, FC port/link are arranged in port pairs, each - * gets a byte of status: - * - * *_OTHER: other err, FC_ERRCAP[31:20] - * *_LOGO: target sent FLOGI/PLOGI/LOGO while logged in - * *_CRC_T: CRC threshold exceeded - * *_LOGI_R: login state machine timed out and retrying - * *_LOGI_F: login failed, FC_ERROR[19:0] - * *_LOGI_S: login succeeded - * *_LINK_DN: link online to offline - * *_LINK_UP: link offline to online - */ -#define SISL_ASTATUS_FC2_OTHER 0x80000000ULL /* b32 */ -#define SISL_ASTATUS_FC2_LOGO 0x40000000ULL /* b33 */ -#define SISL_ASTATUS_FC2_CRC_T 0x20000000ULL /* b34 */ -#define SISL_ASTATUS_FC2_LOGI_R 0x10000000ULL /* b35 */ -#define SISL_ASTATUS_FC2_LOGI_F 0x08000000ULL /* b36 */ -#define SISL_ASTATUS_FC2_LOGI_S 0x04000000ULL /* b37 */ -#define SISL_ASTATUS_FC2_LINK_DN 0x02000000ULL /* b38 */ -#define SISL_ASTATUS_FC2_LINK_UP 0x01000000ULL /* b39 */ - -#define SISL_ASTATUS_FC3_OTHER 0x00800000ULL /* b40 */ -#define SISL_ASTATUS_FC3_LOGO 0x00400000ULL /* b41 */ -#define SISL_ASTATUS_FC3_CRC_T 0x00200000ULL /* b42 */ -#define SISL_ASTATUS_FC3_LOGI_R 0x00100000ULL /* b43 */ -#define SISL_ASTATUS_FC3_LOGI_F 0x00080000ULL /* b44 */ -#define SISL_ASTATUS_FC3_LOGI_S 0x00040000ULL /* b45 */ -#define SISL_ASTATUS_FC3_LINK_DN 0x00020000ULL /* b46 */ -#define SISL_ASTATUS_FC3_LINK_UP 0x00010000ULL /* b47 */ - -#define SISL_ASTATUS_FC0_OTHER 0x00008000ULL /* b48 */ -#define SISL_ASTATUS_FC0_LOGO 0x00004000ULL /* b49 */ -#define SISL_ASTATUS_FC0_CRC_T 0x00002000ULL /* b50 */ -#define SISL_ASTATUS_FC0_LOGI_R 0x00001000ULL /* b51 */ -#define SISL_ASTATUS_FC0_LOGI_F 0x00000800ULL /* b52 */ -#define SISL_ASTATUS_FC0_LOGI_S 0x00000400ULL /* b53 */ -#define SISL_ASTATUS_FC0_LINK_DN 0x00000200ULL /* b54 */ -#define SISL_ASTATUS_FC0_LINK_UP 0x00000100ULL /* b55 */ - -#define SISL_ASTATUS_FC1_OTHER 0x00000080ULL /* b56 */ -#define SISL_ASTATUS_FC1_LOGO 0x00000040ULL /* b57 */ -#define SISL_ASTATUS_FC1_CRC_T 0x00000020ULL /* b58 */ -#define SISL_ASTATUS_FC1_LOGI_R 0x00000010ULL /* b59 */ -#define SISL_ASTATUS_FC1_LOGI_F 0x00000008ULL /* b60 */ -#define SISL_ASTATUS_FC1_LOGI_S 0x00000004ULL /* b61 */ -#define SISL_ASTATUS_FC1_LINK_DN 0x00000002ULL /* b62 */ -#define SISL_ASTATUS_FC1_LINK_UP 0x00000001ULL /* b63 */ - -#define SISL_FC_INTERNAL_UNMASK 0x0000000300000000ULL /* 1 means unmasked */ -#define SISL_FC_INTERNAL_MASK ~(SISL_FC_INTERNAL_UNMASK) -#define SISL_FC_INTERNAL_SHIFT 32 - -#define SISL_FC_SHUTDOWN_NORMAL 0x0000000000000010ULL -#define SISL_FC_SHUTDOWN_ABRUPT 0x0000000000000020ULL - -#define SISL_STATUS_SHUTDOWN_ACTIVE 0x0000000000000010ULL -#define SISL_STATUS_SHUTDOWN_COMPLETE 0x0000000000000020ULL - -#define SISL_ASTATUS_UNMASK 0xFFFFFFFFULL /* 1 means unmasked */ -#define SISL_ASTATUS_MASK ~(SISL_ASTATUS_UNMASK) /* 1 means masked */ - - __be64 aintr_clear; - __be64 aintr_mask; - __be64 afu_ctrl; - __be64 afu_hb; - __be64 afu_scratch_pad; - __be64 afu_port_sel; -#define SISL_AFUCONF_AR_IOARCB 0x4000ULL -#define SISL_AFUCONF_AR_LXT 0x2000ULL -#define SISL_AFUCONF_AR_RHT 0x1000ULL -#define SISL_AFUCONF_AR_DATA 0x0800ULL -#define SISL_AFUCONF_AR_RSRC 0x0400ULL -#define SISL_AFUCONF_AR_IOASA 0x0200ULL -#define SISL_AFUCONF_AR_RRQ 0x0100ULL -/* Aggregate all Auto Retry Bits */ -#define SISL_AFUCONF_AR_ALL (SISL_AFUCONF_AR_IOARCB|SISL_AFUCONF_AR_LXT| \ - SISL_AFUCONF_AR_RHT|SISL_AFUCONF_AR_DATA| \ - SISL_AFUCONF_AR_RSRC|SISL_AFUCONF_AR_IOASA| \ - SISL_AFUCONF_AR_RRQ) -#ifdef __BIG_ENDIAN -#define SISL_AFUCONF_ENDIAN 0x0000ULL -#else -#define SISL_AFUCONF_ENDIAN 0x0020ULL -#endif -#define SISL_AFUCONF_MBOX_CLR_READ 0x0010ULL - __be64 afu_config; - __be64 rsvd[0xf8]; - __le64 afu_version; - __be64 interface_version; -#define SISL_INTVER_CAP_SHIFT 16 -#define SISL_INTVER_MAJ_SHIFT 8 -#define SISL_INTVER_CAP_MASK 0xFFFFFFFF00000000ULL -#define SISL_INTVER_MAJ_MASK 0x00000000FFFF0000ULL -#define SISL_INTVER_MIN_MASK 0x000000000000FFFFULL -#define SISL_INTVER_CAP_IOARRIN_CMD_MODE 0x800000000000ULL -#define SISL_INTVER_CAP_SQ_CMD_MODE 0x400000000000ULL -#define SISL_INTVER_CAP_RESERVED_CMD_MODE_A 0x200000000000ULL -#define SISL_INTVER_CAP_RESERVED_CMD_MODE_B 0x100000000000ULL -#define SISL_INTVER_CAP_LUN_PROVISION 0x080000000000ULL -#define SISL_INTVER_CAP_AFU_DEBUG 0x040000000000ULL -#define SISL_INTVER_CAP_OCXL_LISN 0x020000000000ULL -}; - -#define CXLFLASH_NUM_FC_PORTS_PER_BANK 2 /* fixed # of ports per bank */ -#define CXLFLASH_MAX_FC_BANKS 2 /* max # of banks supported */ -#define CXLFLASH_MAX_FC_PORTS (CXLFLASH_NUM_FC_PORTS_PER_BANK * \ - CXLFLASH_MAX_FC_BANKS) -#define CXLFLASH_MAX_CONTEXT 512 /* number of contexts per AFU */ -#define CXLFLASH_NUM_VLUNS 512 /* number of vluns per AFU/port */ -#define CXLFLASH_NUM_REGS 512 /* number of registers per port */ - -struct fc_port_bank { - __be64 fc_port_regs[CXLFLASH_NUM_FC_PORTS_PER_BANK][CXLFLASH_NUM_REGS]; - __be64 fc_port_luns[CXLFLASH_NUM_FC_PORTS_PER_BANK][CXLFLASH_NUM_VLUNS]; -}; - -struct sisl_global_map { - union { - struct sisl_global_regs regs; - char page0[SIZE_4K]; /* page 0 */ - }; - - char page1[SIZE_4K]; /* page 1 */ - - struct fc_port_bank bank[CXLFLASH_MAX_FC_BANKS]; /* pages 2 - 9 */ - - /* pages 10 - 15 are reserved */ - -}; - -/* - * CXL Flash Memory Map - * - * +-------------------------------+ - * | 512 * 64 KB User MMIO | - * | (per context) | - * | User Accessible | - * +-------------------------------+ - * | 512 * 128 B per context | - * | Provisioning and Control | - * | Trusted Process accessible | - * +-------------------------------+ - * | 64 KB Global | - * | Trusted Process accessible | - * +-------------------------------+ - */ -struct cxlflash_afu_map { - union { - struct sisl_host_map host; - char harea[SIZE_64K]; /* 64KB each */ - } hosts[CXLFLASH_MAX_CONTEXT]; - - union { - struct sisl_ctrl_map ctrl; - char carea[cache_line_size()]; /* 128B each */ - } ctrls[CXLFLASH_MAX_CONTEXT]; - - union { - struct sisl_global_map global; - char garea[SIZE_64K]; /* 64KB single block */ - }; -}; - -/* - * LXT - LBA Translation Table - * LXT control blocks - */ -struct sisl_lxt_entry { - u64 rlba_base; /* bits 0:47 is base - * b48:55 is lun index - * b58:59 is write & read perms - * (if no perm, afu_rc=0x15) - * b60:63 is port_sel mask - */ -}; - -/* - * RHT - Resource Handle Table - * Per the SISlite spec, RHT entries are to be 16-byte aligned - */ -struct sisl_rht_entry { - struct sisl_lxt_entry *lxt_start; - u32 lxt_cnt; - u16 rsvd; - u8 fp; /* format & perm nibbles. - * (if no perm, afu_rc=0x05) - */ - u8 nmask; -} __packed __aligned(16); - -struct sisl_rht_entry_f1 { - u64 lun_id; - union { - struct { - u8 valid; - u8 rsvd[5]; - u8 fp; - u8 port_sel; - }; - - u64 dw; - }; -} __packed __aligned(16); - -/* make the fp byte */ -#define SISL_RHT_FP(fmt, perm) (((fmt) << 4) | (perm)) - -/* make the fp byte for a clone from a source fp and clone flags - * flags must be only 2 LSB bits. - */ -#define SISL_RHT_FP_CLONE(src_fp, cln_flags) ((src_fp) & (0xFC | (cln_flags))) - -#define RHT_PERM_READ 0x01U -#define RHT_PERM_WRITE 0x02U -#define RHT_PERM_RW (RHT_PERM_READ | RHT_PERM_WRITE) - -/* extract the perm bits from a fp */ -#define SISL_RHT_PERM(fp) ((fp) & RHT_PERM_RW) - -#define PORT0 0x01U -#define PORT1 0x02U -#define PORT2 0x04U -#define PORT3 0x08U -#define PORT_MASK(_n) ((1 << (_n)) - 1) - -/* AFU Sync Mode byte */ -#define AFU_LW_SYNC 0x0U -#define AFU_HW_SYNC 0x1U -#define AFU_GSYNC 0x2U - -/* Special Task Management Function CDB */ -#define TMF_LUN_RESET 0x1U -#define TMF_CLEAR_ACA 0x2U - -#endif /* _SISLITE_H */ diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c deleted file mode 100644 index 97631f48e19d..000000000000 --- a/drivers/scsi/cxlflash/superpipe.c +++ /dev/null @@ -1,2218 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#include <linux/delay.h> -#include <linux/file.h> -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/syscalls.h> -#include <linux/unaligned.h> - -#include <scsi/scsi.h> -#include <scsi/scsi_host.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_eh.h> -#include <uapi/scsi/cxlflash_ioctl.h> - -#include "sislite.h" -#include "common.h" -#include "vlun.h" -#include "superpipe.h" - -struct cxlflash_global global; - -/** - * marshal_rele_to_resize() - translate release to resize structure - * @release: Source structure from which to translate/copy. - * @resize: Destination structure for the translate/copy. - */ -static void marshal_rele_to_resize(struct dk_cxlflash_release *release, - struct dk_cxlflash_resize *resize) -{ - resize->hdr = release->hdr; - resize->context_id = release->context_id; - resize->rsrc_handle = release->rsrc_handle; -} - -/** - * marshal_det_to_rele() - translate detach to release structure - * @detach: Destination structure for the translate/copy. - * @release: Source structure from which to translate/copy. - */ -static void marshal_det_to_rele(struct dk_cxlflash_detach *detach, - struct dk_cxlflash_release *release) -{ - release->hdr = detach->hdr; - release->context_id = detach->context_id; -} - -/** - * marshal_udir_to_rele() - translate udirect to release structure - * @udirect: Source structure from which to translate/copy. - * @release: Destination structure for the translate/copy. - */ -static void marshal_udir_to_rele(struct dk_cxlflash_udirect *udirect, - struct dk_cxlflash_release *release) -{ - release->hdr = udirect->hdr; - release->context_id = udirect->context_id; - release->rsrc_handle = udirect->rsrc_handle; -} - -/** - * cxlflash_free_errpage() - frees resources associated with global error page - */ -void cxlflash_free_errpage(void) -{ - - mutex_lock(&global.mutex); - if (global.err_page) { - __free_page(global.err_page); - global.err_page = NULL; - } - mutex_unlock(&global.mutex); -} - -/** - * cxlflash_stop_term_user_contexts() - stops/terminates known user contexts - * @cfg: Internal structure associated with the host. - * - * When the host needs to go down, all users must be quiesced and their - * memory freed. This is accomplished by putting the contexts in error - * state which will notify the user and let them 'drive' the tear down. - * Meanwhile, this routine camps until all user contexts have been removed. - * - * Note that the main loop in this routine will always execute at least once - * to flush the reset_waitq. - */ -void cxlflash_stop_term_user_contexts(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - int i, found = true; - - cxlflash_mark_contexts_error(cfg); - - while (true) { - for (i = 0; i < MAX_CONTEXT; i++) - if (cfg->ctx_tbl[i]) { - found = true; - break; - } - - if (!found && list_empty(&cfg->ctx_err_recovery)) - return; - - dev_dbg(dev, "%s: Wait for user contexts to quiesce...\n", - __func__); - wake_up_all(&cfg->reset_waitq); - ssleep(1); - found = false; - } -} - -/** - * find_error_context() - locates a context by cookie on the error recovery list - * @cfg: Internal structure associated with the host. - * @rctxid: Desired context by id. - * @file: Desired context by file. - * - * Return: Found context on success, NULL on failure - */ -static struct ctx_info *find_error_context(struct cxlflash_cfg *cfg, u64 rctxid, - struct file *file) -{ - struct ctx_info *ctxi; - - list_for_each_entry(ctxi, &cfg->ctx_err_recovery, list) - if ((ctxi->ctxid == rctxid) || (ctxi->file == file)) - return ctxi; - - return NULL; -} - -/** - * get_context() - obtains a validated and locked context reference - * @cfg: Internal structure associated with the host. - * @rctxid: Desired context (raw, un-decoded format). - * @arg: LUN information or file associated with request. - * @ctx_ctrl: Control information to 'steer' desired lookup. - * - * NOTE: despite the name pid, in linux, current->pid actually refers - * to the lightweight process id (tid) and can change if the process is - * multi threaded. The tgid remains constant for the process and only changes - * when the process of fork. For all intents and purposes, think of tgid - * as a pid in the traditional sense. - * - * Return: Validated context on success, NULL on failure - */ -struct ctx_info *get_context(struct cxlflash_cfg *cfg, u64 rctxid, - void *arg, enum ctx_ctrl ctx_ctrl) -{ - struct device *dev = &cfg->dev->dev; - struct ctx_info *ctxi = NULL; - struct lun_access *lun_access = NULL; - struct file *file = NULL; - struct llun_info *lli = arg; - u64 ctxid = DECODE_CTXID(rctxid); - int rc; - pid_t pid = task_tgid_nr(current), ctxpid = 0; - - if (ctx_ctrl & CTX_CTRL_FILE) { - lli = NULL; - file = (struct file *)arg; - } - - if (ctx_ctrl & CTX_CTRL_CLONE) - pid = task_ppid_nr(current); - - if (likely(ctxid < MAX_CONTEXT)) { - while (true) { - mutex_lock(&cfg->ctx_tbl_list_mutex); - ctxi = cfg->ctx_tbl[ctxid]; - if (ctxi) - if ((file && (ctxi->file != file)) || - (!file && (ctxi->ctxid != rctxid))) - ctxi = NULL; - - if ((ctx_ctrl & CTX_CTRL_ERR) || - (!ctxi && (ctx_ctrl & CTX_CTRL_ERR_FALLBACK))) - ctxi = find_error_context(cfg, rctxid, file); - if (!ctxi) { - mutex_unlock(&cfg->ctx_tbl_list_mutex); - goto out; - } - - /* - * Need to acquire ownership of the context while still - * under the table/list lock to serialize with a remove - * thread. Use the 'try' to avoid stalling the - * table/list lock for a single context. - * - * Note that the lock order is: - * - * cfg->ctx_tbl_list_mutex -> ctxi->mutex - * - * Therefore release ctx_tbl_list_mutex before retrying. - */ - rc = mutex_trylock(&ctxi->mutex); - mutex_unlock(&cfg->ctx_tbl_list_mutex); - if (rc) - break; /* got the context's lock! */ - } - - if (ctxi->unavail) - goto denied; - - ctxpid = ctxi->pid; - if (likely(!(ctx_ctrl & CTX_CTRL_NOPID))) - if (pid != ctxpid) - goto denied; - - if (lli) { - list_for_each_entry(lun_access, &ctxi->luns, list) - if (lun_access->lli == lli) - goto out; - goto denied; - } - } - -out: - dev_dbg(dev, "%s: rctxid=%016llx ctxinfo=%p ctxpid=%u pid=%u " - "ctx_ctrl=%u\n", __func__, rctxid, ctxi, ctxpid, pid, - ctx_ctrl); - - return ctxi; - -denied: - mutex_unlock(&ctxi->mutex); - ctxi = NULL; - goto out; -} - -/** - * put_context() - release a context that was retrieved from get_context() - * @ctxi: Context to release. - * - * For now, releasing the context equates to unlocking it's mutex. - */ -void put_context(struct ctx_info *ctxi) -{ - mutex_unlock(&ctxi->mutex); -} - -/** - * afu_attach() - attach a context to the AFU - * @cfg: Internal structure associated with the host. - * @ctxi: Context to attach. - * - * Upon setting the context capabilities, they must be confirmed with - * a read back operation as the context might have been closed since - * the mailbox was unlocked. When this occurs, registration is failed. - * - * Return: 0 on success, -errno on failure - */ -static int afu_attach(struct cxlflash_cfg *cfg, struct ctx_info *ctxi) -{ - struct device *dev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct sisl_ctrl_map __iomem *ctrl_map = ctxi->ctrl_map; - int rc = 0; - struct hwq *hwq = get_hwq(afu, PRIMARY_HWQ); - u64 val; - int i; - - /* Unlock cap and restrict user to read/write cmds in translated mode */ - readq_be(&ctrl_map->mbox_r); - val = (SISL_CTX_CAP_READ_CMD | SISL_CTX_CAP_WRITE_CMD); - writeq_be(val, &ctrl_map->ctx_cap); - val = readq_be(&ctrl_map->ctx_cap); - if (val != (SISL_CTX_CAP_READ_CMD | SISL_CTX_CAP_WRITE_CMD)) { - dev_err(dev, "%s: ctx may be closed val=%016llx\n", - __func__, val); - rc = -EAGAIN; - goto out; - } - - if (afu_is_ocxl_lisn(afu)) { - /* Set up the LISN effective address for each interrupt */ - for (i = 0; i < ctxi->irqs; i++) { - val = cfg->ops->get_irq_objhndl(ctxi->ctx, i); - writeq_be(val, &ctrl_map->lisn_ea[i]); - } - - /* Use primary HWQ PASID as identifier for all interrupts */ - val = hwq->ctx_hndl; - writeq_be(SISL_LISN_PASID(val, val), &ctrl_map->lisn_pasid[0]); - writeq_be(SISL_LISN_PASID(0UL, val), &ctrl_map->lisn_pasid[1]); - } - - /* Set up MMIO registers pointing to the RHT */ - writeq_be((u64)ctxi->rht_start, &ctrl_map->rht_start); - val = SISL_RHT_CNT_ID((u64)MAX_RHT_PER_CONTEXT, (u64)(hwq->ctx_hndl)); - writeq_be(val, &ctrl_map->rht_cnt_id); -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * read_cap16() - issues a SCSI READ_CAP16 command - * @sdev: SCSI device associated with LUN. - * @lli: LUN destined for capacity request. - * - * The READ_CAP16 can take quite a while to complete. Should an EEH occur while - * in scsi_execute_cmd(), the EEH handler will attempt to recover. As part of - * the recovery, the handler drains all currently running ioctls, waiting until - * they have completed before proceeding with a reset. As this routine is used - * on the ioctl path, this can create a condition where the EEH handler becomes - * stuck, infinitely waiting for this ioctl thread. To avoid this behavior, - * temporarily unmark this thread as an ioctl thread by releasing the ioctl - * read semaphore. This will allow the EEH handler to proceed with a recovery - * while this thread is still running. Once the scsi_execute_cmd() returns, - * reacquire the ioctl read semaphore and check the adapter state in case it - * changed while inside of scsi_execute_cmd(). The state check will wait if the - * adapter is still being recovered or return a failure if the recovery failed. - * In the event that the adapter reset failed, simply return the failure as the - * ioctl would be unable to continue. - * - * Note that the above puts a requirement on this routine to only be called on - * an ioctl thread. - * - * Return: 0 on success, -errno on failure - */ -static int read_cap16(struct scsi_device *sdev, struct llun_info *lli) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct glun_info *gli = lli->parent; - struct scsi_sense_hdr sshdr; - const struct scsi_exec_args exec_args = { - .sshdr = &sshdr, - }; - u8 *cmd_buf = NULL; - u8 *scsi_cmd = NULL; - int rc = 0; - int result = 0; - int retry_cnt = 0; - u32 to = CMD_TIMEOUT * HZ; - -retry: - cmd_buf = kzalloc(CMD_BUFSIZE, GFP_KERNEL); - scsi_cmd = kzalloc(MAX_COMMAND_SIZE, GFP_KERNEL); - if (unlikely(!cmd_buf || !scsi_cmd)) { - rc = -ENOMEM; - goto out; - } - - scsi_cmd[0] = SERVICE_ACTION_IN_16; /* read cap(16) */ - scsi_cmd[1] = SAI_READ_CAPACITY_16; /* service action */ - put_unaligned_be32(CMD_BUFSIZE, &scsi_cmd[10]); - - dev_dbg(dev, "%s: %ssending cmd(%02x)\n", __func__, - retry_cnt ? "re" : "", scsi_cmd[0]); - - /* Drop the ioctl read semaphore across lengthy call */ - up_read(&cfg->ioctl_rwsem); - result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_IN, cmd_buf, - CMD_BUFSIZE, to, CMD_RETRIES, &exec_args); - down_read(&cfg->ioctl_rwsem); - rc = check_state(cfg); - if (rc) { - dev_err(dev, "%s: Failed state result=%08x\n", - __func__, result); - rc = -ENODEV; - goto out; - } - - if (result > 0 && scsi_sense_valid(&sshdr)) { - if (result & SAM_STAT_CHECK_CONDITION) { - switch (sshdr.sense_key) { - case NO_SENSE: - case RECOVERED_ERROR: - case NOT_READY: - result &= ~SAM_STAT_CHECK_CONDITION; - break; - case UNIT_ATTENTION: - switch (sshdr.asc) { - case 0x29: /* Power on Reset or Device Reset */ - fallthrough; - case 0x2A: /* Device capacity changed */ - case 0x3F: /* Report LUNs changed */ - /* Retry the command once more */ - if (retry_cnt++ < 1) { - kfree(cmd_buf); - kfree(scsi_cmd); - goto retry; - } - } - break; - default: - break; - } - } - } - - if (result) { - dev_err(dev, "%s: command failed, result=%08x\n", - __func__, result); - rc = -EIO; - goto out; - } - - /* - * Read cap was successful, grab values from the buffer; - * note that we don't need to worry about unaligned access - * as the buffer is allocated on an aligned boundary. - */ - mutex_lock(&gli->mutex); - gli->max_lba = be64_to_cpu(*((__be64 *)&cmd_buf[0])); - gli->blk_len = be32_to_cpu(*((__be32 *)&cmd_buf[8])); - mutex_unlock(&gli->mutex); - -out: - kfree(cmd_buf); - kfree(scsi_cmd); - - dev_dbg(dev, "%s: maxlba=%lld blklen=%d rc=%d\n", - __func__, gli->max_lba, gli->blk_len, rc); - return rc; -} - -/** - * get_rhte() - obtains validated resource handle table entry reference - * @ctxi: Context owning the resource handle. - * @rhndl: Resource handle associated with entry. - * @lli: LUN associated with request. - * - * Return: Validated RHTE on success, NULL on failure - */ -struct sisl_rht_entry *get_rhte(struct ctx_info *ctxi, res_hndl_t rhndl, - struct llun_info *lli) -{ - struct cxlflash_cfg *cfg = ctxi->cfg; - struct device *dev = &cfg->dev->dev; - struct sisl_rht_entry *rhte = NULL; - - if (unlikely(!ctxi->rht_start)) { - dev_dbg(dev, "%s: Context does not have allocated RHT\n", - __func__); - goto out; - } - - if (unlikely(rhndl >= MAX_RHT_PER_CONTEXT)) { - dev_dbg(dev, "%s: Bad resource handle rhndl=%d\n", - __func__, rhndl); - goto out; - } - - if (unlikely(ctxi->rht_lun[rhndl] != lli)) { - dev_dbg(dev, "%s: Bad resource handle LUN rhndl=%d\n", - __func__, rhndl); - goto out; - } - - rhte = &ctxi->rht_start[rhndl]; - if (unlikely(rhte->nmask == 0)) { - dev_dbg(dev, "%s: Unopened resource handle rhndl=%d\n", - __func__, rhndl); - rhte = NULL; - goto out; - } - -out: - return rhte; -} - -/** - * rhte_checkout() - obtains free/empty resource handle table entry - * @ctxi: Context owning the resource handle. - * @lli: LUN associated with request. - * - * Return: Free RHTE on success, NULL on failure - */ -struct sisl_rht_entry *rhte_checkout(struct ctx_info *ctxi, - struct llun_info *lli) -{ - struct cxlflash_cfg *cfg = ctxi->cfg; - struct device *dev = &cfg->dev->dev; - struct sisl_rht_entry *rhte = NULL; - int i; - - /* Find a free RHT entry */ - for (i = 0; i < MAX_RHT_PER_CONTEXT; i++) - if (ctxi->rht_start[i].nmask == 0) { - rhte = &ctxi->rht_start[i]; - ctxi->rht_out++; - break; - } - - if (likely(rhte)) - ctxi->rht_lun[i] = lli; - - dev_dbg(dev, "%s: returning rhte=%p index=%d\n", __func__, rhte, i); - return rhte; -} - -/** - * rhte_checkin() - releases a resource handle table entry - * @ctxi: Context owning the resource handle. - * @rhte: RHTE to release. - */ -void rhte_checkin(struct ctx_info *ctxi, - struct sisl_rht_entry *rhte) -{ - u32 rsrc_handle = rhte - ctxi->rht_start; - - rhte->nmask = 0; - rhte->fp = 0; - ctxi->rht_out--; - ctxi->rht_lun[rsrc_handle] = NULL; - ctxi->rht_needs_ws[rsrc_handle] = false; -} - -/** - * rht_format1() - populates a RHTE for format 1 - * @rhte: RHTE to populate. - * @lun_id: LUN ID of LUN associated with RHTE. - * @perm: Desired permissions for RHTE. - * @port_sel: Port selection mask - */ -static void rht_format1(struct sisl_rht_entry *rhte, u64 lun_id, u32 perm, - u32 port_sel) -{ - /* - * Populate the Format 1 RHT entry for direct access (physical - * LUN) using the synchronization sequence defined in the - * SISLite specification. - */ - struct sisl_rht_entry_f1 dummy = { 0 }; - struct sisl_rht_entry_f1 *rhte_f1 = (struct sisl_rht_entry_f1 *)rhte; - - memset(rhte_f1, 0, sizeof(*rhte_f1)); - rhte_f1->fp = SISL_RHT_FP(1U, 0); - dma_wmb(); /* Make setting of format bit visible */ - - rhte_f1->lun_id = lun_id; - dma_wmb(); /* Make setting of LUN id visible */ - - /* - * Use a dummy RHT Format 1 entry to build the second dword - * of the entry that must be populated in a single write when - * enabled (valid bit set to TRUE). - */ - dummy.valid = 0x80; - dummy.fp = SISL_RHT_FP(1U, perm); - dummy.port_sel = port_sel; - rhte_f1->dw = dummy.dw; - - dma_wmb(); /* Make remaining RHT entry fields visible */ -} - -/** - * cxlflash_lun_attach() - attaches a user to a LUN and manages the LUN's mode - * @gli: LUN to attach. - * @mode: Desired mode of the LUN. - * @locked: Mutex status on current thread. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_lun_attach(struct glun_info *gli, enum lun_mode mode, bool locked) -{ - int rc = 0; - - if (!locked) - mutex_lock(&gli->mutex); - - if (gli->mode == MODE_NONE) - gli->mode = mode; - else if (gli->mode != mode) { - pr_debug("%s: gli_mode=%d requested_mode=%d\n", - __func__, gli->mode, mode); - rc = -EINVAL; - goto out; - } - - gli->users++; - WARN_ON(gli->users <= 0); -out: - pr_debug("%s: Returning rc=%d gli->mode=%u gli->users=%u\n", - __func__, rc, gli->mode, gli->users); - if (!locked) - mutex_unlock(&gli->mutex); - return rc; -} - -/** - * cxlflash_lun_detach() - detaches a user from a LUN and resets the LUN's mode - * @gli: LUN to detach. - * - * When resetting the mode, terminate block allocation resources as they - * are no longer required (service is safe to call even when block allocation - * resources were not present - such as when transitioning from physical mode). - * These resources will be reallocated when needed (subsequent transition to - * virtual mode). - */ -void cxlflash_lun_detach(struct glun_info *gli) -{ - mutex_lock(&gli->mutex); - WARN_ON(gli->mode == MODE_NONE); - if (--gli->users == 0) { - gli->mode = MODE_NONE; - cxlflash_ba_terminate(&gli->blka.ba_lun); - } - pr_debug("%s: gli->users=%u\n", __func__, gli->users); - WARN_ON(gli->users < 0); - mutex_unlock(&gli->mutex); -} - -/** - * _cxlflash_disk_release() - releases the specified resource entry - * @sdev: SCSI device associated with LUN. - * @ctxi: Context owning resources. - * @release: Release ioctl data structure. - * - * For LUNs in virtual mode, the virtual LUN associated with the specified - * resource handle is resized to 0 prior to releasing the RHTE. Note that the - * AFU sync should _not_ be performed when the context is sitting on the error - * recovery list. A context on the error recovery list is not known to the AFU - * due to reset. When the context is recovered, it will be reattached and made - * known again to the AFU. - * - * Return: 0 on success, -errno on failure - */ -int _cxlflash_disk_release(struct scsi_device *sdev, - struct ctx_info *ctxi, - struct dk_cxlflash_release *release) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct afu *afu = cfg->afu; - bool put_ctx = false; - - struct dk_cxlflash_resize size; - res_hndl_t rhndl = release->rsrc_handle; - - int rc = 0; - int rcr = 0; - u64 ctxid = DECODE_CTXID(release->context_id), - rctxid = release->context_id; - - struct sisl_rht_entry *rhte; - struct sisl_rht_entry_f1 *rhte_f1; - - dev_dbg(dev, "%s: ctxid=%llu rhndl=%llu gli->mode=%u gli->users=%u\n", - __func__, ctxid, release->rsrc_handle, gli->mode, gli->users); - - if (!ctxi) { - ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", - __func__, ctxid); - rc = -EINVAL; - goto out; - } - - put_ctx = true; - } - - rhte = get_rhte(ctxi, rhndl, lli); - if (unlikely(!rhte)) { - dev_dbg(dev, "%s: Bad resource handle rhndl=%d\n", - __func__, rhndl); - rc = -EINVAL; - goto out; - } - - /* - * Resize to 0 for virtual LUNS by setting the size - * to 0. This will clear LXT_START and LXT_CNT fields - * in the RHT entry and properly sync with the AFU. - * - * Afterwards we clear the remaining fields. - */ - switch (gli->mode) { - case MODE_VIRTUAL: - marshal_rele_to_resize(release, &size); - size.req_size = 0; - rc = _cxlflash_vlun_resize(sdev, ctxi, &size); - if (rc) { - dev_dbg(dev, "%s: resize failed rc %d\n", __func__, rc); - goto out; - } - - break; - case MODE_PHYSICAL: - /* - * Clear the Format 1 RHT entry for direct access - * (physical LUN) using the synchronization sequence - * defined in the SISLite specification. - */ - rhte_f1 = (struct sisl_rht_entry_f1 *)rhte; - - rhte_f1->valid = 0; - dma_wmb(); /* Make revocation of RHT entry visible */ - - rhte_f1->lun_id = 0; - dma_wmb(); /* Make clearing of LUN id visible */ - - rhte_f1->dw = 0; - dma_wmb(); /* Make RHT entry bottom-half clearing visible */ - - if (!ctxi->err_recovery_active) { - rcr = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC); - if (unlikely(rcr)) - dev_dbg(dev, "%s: AFU sync failed rc=%d\n", - __func__, rcr); - } - break; - default: - WARN(1, "Unsupported LUN mode!"); - goto out; - } - - rhte_checkin(ctxi, rhte); - cxlflash_lun_detach(gli); - -out: - if (put_ctx) - put_context(ctxi); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -int cxlflash_disk_release(struct scsi_device *sdev, void *release) -{ - return _cxlflash_disk_release(sdev, NULL, release); -} - -/** - * destroy_context() - releases a context - * @cfg: Internal structure associated with the host. - * @ctxi: Context to release. - * - * This routine is safe to be called with a a non-initialized context. - * Also note that the routine conditionally checks for the existence - * of the context control map before clearing the RHT registers and - * context capabilities because it is possible to destroy a context - * while the context is in the error state (previous mapping was - * removed [so there is no need to worry about clearing] and context - * is waiting for a new mapping). - */ -static void destroy_context(struct cxlflash_cfg *cfg, - struct ctx_info *ctxi) -{ - struct afu *afu = cfg->afu; - - if (ctxi->initialized) { - WARN_ON(!list_empty(&ctxi->luns)); - - /* Clear RHT registers and drop all capabilities for context */ - if (afu->afu_map && ctxi->ctrl_map) { - writeq_be(0, &ctxi->ctrl_map->rht_start); - writeq_be(0, &ctxi->ctrl_map->rht_cnt_id); - writeq_be(0, &ctxi->ctrl_map->ctx_cap); - } - } - - /* Free memory associated with context */ - free_page((ulong)ctxi->rht_start); - kfree(ctxi->rht_needs_ws); - kfree(ctxi->rht_lun); - kfree(ctxi); -} - -/** - * create_context() - allocates and initializes a context - * @cfg: Internal structure associated with the host. - * - * Return: Allocated context on success, NULL on failure - */ -static struct ctx_info *create_context(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - struct ctx_info *ctxi = NULL; - struct llun_info **lli = NULL; - u8 *ws = NULL; - struct sisl_rht_entry *rhte; - - ctxi = kzalloc(sizeof(*ctxi), GFP_KERNEL); - lli = kzalloc((MAX_RHT_PER_CONTEXT * sizeof(*lli)), GFP_KERNEL); - ws = kzalloc((MAX_RHT_PER_CONTEXT * sizeof(*ws)), GFP_KERNEL); - if (unlikely(!ctxi || !lli || !ws)) { - dev_err(dev, "%s: Unable to allocate context\n", __func__); - goto err; - } - - rhte = (struct sisl_rht_entry *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!rhte)) { - dev_err(dev, "%s: Unable to allocate RHT\n", __func__); - goto err; - } - - ctxi->rht_lun = lli; - ctxi->rht_needs_ws = ws; - ctxi->rht_start = rhte; -out: - return ctxi; - -err: - kfree(ws); - kfree(lli); - kfree(ctxi); - ctxi = NULL; - goto out; -} - -/** - * init_context() - initializes a previously allocated context - * @ctxi: Previously allocated context - * @cfg: Internal structure associated with the host. - * @ctx: Previously obtained context cookie. - * @ctxid: Previously obtained process element associated with CXL context. - * @file: Previously obtained file associated with CXL context. - * @perms: User-specified permissions. - * @irqs: User-specified number of interrupts. - */ -static void init_context(struct ctx_info *ctxi, struct cxlflash_cfg *cfg, - void *ctx, int ctxid, struct file *file, u32 perms, - u64 irqs) -{ - struct afu *afu = cfg->afu; - - ctxi->rht_perms = perms; - ctxi->ctrl_map = &afu->afu_map->ctrls[ctxid].ctrl; - ctxi->ctxid = ENCODE_CTXID(ctxi, ctxid); - ctxi->irqs = irqs; - ctxi->pid = task_tgid_nr(current); /* tgid = pid */ - ctxi->ctx = ctx; - ctxi->cfg = cfg; - ctxi->file = file; - ctxi->initialized = true; - mutex_init(&ctxi->mutex); - kref_init(&ctxi->kref); - INIT_LIST_HEAD(&ctxi->luns); - INIT_LIST_HEAD(&ctxi->list); /* initialize for list_empty() */ -} - -/** - * remove_context() - context kref release handler - * @kref: Kernel reference associated with context to be removed. - * - * When a context no longer has any references it can safely be removed - * from global access and destroyed. Note that it is assumed the thread - * relinquishing access to the context holds its mutex. - */ -static void remove_context(struct kref *kref) -{ - struct ctx_info *ctxi = container_of(kref, struct ctx_info, kref); - struct cxlflash_cfg *cfg = ctxi->cfg; - u64 ctxid = DECODE_CTXID(ctxi->ctxid); - - /* Remove context from table/error list */ - WARN_ON(!mutex_is_locked(&ctxi->mutex)); - ctxi->unavail = true; - mutex_unlock(&ctxi->mutex); - mutex_lock(&cfg->ctx_tbl_list_mutex); - mutex_lock(&ctxi->mutex); - - if (!list_empty(&ctxi->list)) - list_del(&ctxi->list); - cfg->ctx_tbl[ctxid] = NULL; - mutex_unlock(&cfg->ctx_tbl_list_mutex); - mutex_unlock(&ctxi->mutex); - - /* Context now completely uncoupled/unreachable */ - destroy_context(cfg, ctxi); -} - -/** - * _cxlflash_disk_detach() - detaches a LUN from a context - * @sdev: SCSI device associated with LUN. - * @ctxi: Context owning resources. - * @detach: Detach ioctl data structure. - * - * As part of the detach, all per-context resources associated with the LUN - * are cleaned up. When detaching the last LUN for a context, the context - * itself is cleaned up and released. - * - * Return: 0 on success, -errno on failure - */ -static int _cxlflash_disk_detach(struct scsi_device *sdev, - struct ctx_info *ctxi, - struct dk_cxlflash_detach *detach) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct lun_access *lun_access, *t; - struct dk_cxlflash_release rel; - bool put_ctx = false; - - int i; - int rc = 0; - u64 ctxid = DECODE_CTXID(detach->context_id), - rctxid = detach->context_id; - - dev_dbg(dev, "%s: ctxid=%llu\n", __func__, ctxid); - - if (!ctxi) { - ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", - __func__, ctxid); - rc = -EINVAL; - goto out; - } - - put_ctx = true; - } - - /* Cleanup outstanding resources tied to this LUN */ - if (ctxi->rht_out) { - marshal_det_to_rele(detach, &rel); - for (i = 0; i < MAX_RHT_PER_CONTEXT; i++) { - if (ctxi->rht_lun[i] == lli) { - rel.rsrc_handle = i; - _cxlflash_disk_release(sdev, ctxi, &rel); - } - - /* No need to loop further if we're done */ - if (ctxi->rht_out == 0) - break; - } - } - - /* Take our LUN out of context, free the node */ - list_for_each_entry_safe(lun_access, t, &ctxi->luns, list) - if (lun_access->lli == lli) { - list_del(&lun_access->list); - kfree(lun_access); - lun_access = NULL; - break; - } - - /* - * Release the context reference and the sdev reference that - * bound this LUN to the context. - */ - if (kref_put(&ctxi->kref, remove_context)) - put_ctx = false; - scsi_device_put(sdev); -out: - if (put_ctx) - put_context(ctxi); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -static int cxlflash_disk_detach(struct scsi_device *sdev, void *detach) -{ - return _cxlflash_disk_detach(sdev, NULL, detach); -} - -/** - * cxlflash_cxl_release() - release handler for adapter file descriptor - * @inode: File-system inode associated with fd. - * @file: File installed with adapter file descriptor. - * - * This routine is the release handler for the fops registered with - * the CXL services on an initial attach for a context. It is called - * when a close (explicitly by the user or as part of a process tear - * down) is performed on the adapter file descriptor returned to the - * user. The user should be aware that explicitly performing a close - * considered catastrophic and subsequent usage of the superpipe API - * with previously saved off tokens will fail. - * - * This routine derives the context reference and calls detach for - * each LUN associated with the context.The final detach operation - * causes the context itself to be freed. With exception to when the - * CXL process element (context id) lookup fails (a case that should - * theoretically never occur), every call into this routine results - * in a complete freeing of a context. - * - * Detaching the LUN is typically an ioctl() operation and the underlying - * code assumes that ioctl_rwsem has been acquired as a reader. To support - * that design point, the semaphore is acquired and released around detach. - * - * Return: 0 on success - */ -static int cxlflash_cxl_release(struct inode *inode, struct file *file) -{ - struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg, - cxl_fops); - void *ctx = cfg->ops->fops_get_context(file); - struct device *dev = &cfg->dev->dev; - struct ctx_info *ctxi = NULL; - struct dk_cxlflash_detach detach = { { 0 }, 0 }; - struct lun_access *lun_access, *t; - enum ctx_ctrl ctrl = CTX_CTRL_ERR_FALLBACK | CTX_CTRL_FILE; - int ctxid; - - ctxid = cfg->ops->process_element(ctx); - if (unlikely(ctxid < 0)) { - dev_err(dev, "%s: Context %p was closed ctxid=%d\n", - __func__, ctx, ctxid); - goto out; - } - - ctxi = get_context(cfg, ctxid, file, ctrl); - if (unlikely(!ctxi)) { - ctxi = get_context(cfg, ctxid, file, ctrl | CTX_CTRL_CLONE); - if (!ctxi) { - dev_dbg(dev, "%s: ctxid=%d already free\n", - __func__, ctxid); - goto out_release; - } - - dev_dbg(dev, "%s: Another process owns ctxid=%d\n", - __func__, ctxid); - put_context(ctxi); - goto out; - } - - dev_dbg(dev, "%s: close for ctxid=%d\n", __func__, ctxid); - - down_read(&cfg->ioctl_rwsem); - detach.context_id = ctxi->ctxid; - list_for_each_entry_safe(lun_access, t, &ctxi->luns, list) - _cxlflash_disk_detach(lun_access->sdev, ctxi, &detach); - up_read(&cfg->ioctl_rwsem); -out_release: - cfg->ops->fd_release(inode, file); -out: - dev_dbg(dev, "%s: returning\n", __func__); - return 0; -} - -/** - * unmap_context() - clears a previously established mapping - * @ctxi: Context owning the mapping. - * - * This routine is used to switch between the error notification page - * (dummy page of all 1's) and the real mapping (established by the CXL - * fault handler). - */ -static void unmap_context(struct ctx_info *ctxi) -{ - unmap_mapping_range(ctxi->file->f_mapping, 0, 0, 1); -} - -/** - * get_err_page() - obtains and allocates the error notification page - * @cfg: Internal structure associated with the host. - * - * Return: error notification page on success, NULL on failure - */ -static struct page *get_err_page(struct cxlflash_cfg *cfg) -{ - struct page *err_page = global.err_page; - struct device *dev = &cfg->dev->dev; - - if (unlikely(!err_page)) { - err_page = alloc_page(GFP_KERNEL); - if (unlikely(!err_page)) { - dev_err(dev, "%s: Unable to allocate err_page\n", - __func__); - goto out; - } - - memset(page_address(err_page), -1, PAGE_SIZE); - - /* Serialize update w/ other threads to avoid a leak */ - mutex_lock(&global.mutex); - if (likely(!global.err_page)) - global.err_page = err_page; - else { - __free_page(err_page); - err_page = global.err_page; - } - mutex_unlock(&global.mutex); - } - -out: - dev_dbg(dev, "%s: returning err_page=%p\n", __func__, err_page); - return err_page; -} - -/** - * cxlflash_mmap_fault() - mmap fault handler for adapter file descriptor - * @vmf: VM fault associated with current fault. - * - * To support error notification via MMIO, faults are 'caught' by this routine - * that was inserted before passing back the adapter file descriptor on attach. - * When a fault occurs, this routine evaluates if error recovery is active and - * if so, installs the error page to 'notify' the user about the error state. - * During normal operation, the fault is simply handled by the original fault - * handler that was installed by CXL services as part of initializing the - * adapter file descriptor. The VMA's page protection bits are toggled to - * indicate cached/not-cached depending on the memory backing the fault. - * - * Return: 0 on success, VM_FAULT_SIGBUS on failure - */ -static vm_fault_t cxlflash_mmap_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct file *file = vma->vm_file; - struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg, - cxl_fops); - void *ctx = cfg->ops->fops_get_context(file); - struct device *dev = &cfg->dev->dev; - struct ctx_info *ctxi = NULL; - struct page *err_page = NULL; - enum ctx_ctrl ctrl = CTX_CTRL_ERR_FALLBACK | CTX_CTRL_FILE; - vm_fault_t rc = 0; - int ctxid; - - ctxid = cfg->ops->process_element(ctx); - if (unlikely(ctxid < 0)) { - dev_err(dev, "%s: Context %p was closed ctxid=%d\n", - __func__, ctx, ctxid); - goto err; - } - - ctxi = get_context(cfg, ctxid, file, ctrl); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%d\n", __func__, ctxid); - goto err; - } - - dev_dbg(dev, "%s: fault for context %d\n", __func__, ctxid); - - if (likely(!ctxi->err_recovery_active)) { - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - rc = ctxi->cxl_mmap_vmops->fault(vmf); - } else { - dev_dbg(dev, "%s: err recovery active, use err_page\n", - __func__); - - err_page = get_err_page(cfg); - if (unlikely(!err_page)) { - dev_err(dev, "%s: Could not get err_page\n", __func__); - rc = VM_FAULT_RETRY; - goto out; - } - - get_page(err_page); - vmf->page = err_page; - vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); - } - -out: - if (likely(ctxi)) - put_context(ctxi); - dev_dbg(dev, "%s: returning rc=%x\n", __func__, rc); - return rc; - -err: - rc = VM_FAULT_SIGBUS; - goto out; -} - -/* - * Local MMAP vmops to 'catch' faults - */ -static const struct vm_operations_struct cxlflash_mmap_vmops = { - .fault = cxlflash_mmap_fault, -}; - -/** - * cxlflash_cxl_mmap() - mmap handler for adapter file descriptor - * @file: File installed with adapter file descriptor. - * @vma: VM area associated with mapping. - * - * Installs local mmap vmops to 'catch' faults for error notification support. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_cxl_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct cxlflash_cfg *cfg = container_of(file->f_op, struct cxlflash_cfg, - cxl_fops); - void *ctx = cfg->ops->fops_get_context(file); - struct device *dev = &cfg->dev->dev; - struct ctx_info *ctxi = NULL; - enum ctx_ctrl ctrl = CTX_CTRL_ERR_FALLBACK | CTX_CTRL_FILE; - int ctxid; - int rc = 0; - - ctxid = cfg->ops->process_element(ctx); - if (unlikely(ctxid < 0)) { - dev_err(dev, "%s: Context %p was closed ctxid=%d\n", - __func__, ctx, ctxid); - rc = -EIO; - goto out; - } - - ctxi = get_context(cfg, ctxid, file, ctrl); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%d\n", __func__, ctxid); - rc = -EIO; - goto out; - } - - dev_dbg(dev, "%s: mmap for context %d\n", __func__, ctxid); - - rc = cfg->ops->fd_mmap(file, vma); - if (likely(!rc)) { - /* Insert ourself in the mmap fault handler path */ - ctxi->cxl_mmap_vmops = vma->vm_ops; - vma->vm_ops = &cxlflash_mmap_vmops; - } - -out: - if (likely(ctxi)) - put_context(ctxi); - return rc; -} - -const struct file_operations cxlflash_cxl_fops = { - .owner = THIS_MODULE, - .mmap = cxlflash_cxl_mmap, - .release = cxlflash_cxl_release, -}; - -/** - * cxlflash_mark_contexts_error() - move contexts to error state and list - * @cfg: Internal structure associated with the host. - * - * A context is only moved over to the error list when there are no outstanding - * references to it. This ensures that a running operation has completed. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_mark_contexts_error(struct cxlflash_cfg *cfg) -{ - int i, rc = 0; - struct ctx_info *ctxi = NULL; - - mutex_lock(&cfg->ctx_tbl_list_mutex); - - for (i = 0; i < MAX_CONTEXT; i++) { - ctxi = cfg->ctx_tbl[i]; - if (ctxi) { - mutex_lock(&ctxi->mutex); - cfg->ctx_tbl[i] = NULL; - list_add(&ctxi->list, &cfg->ctx_err_recovery); - ctxi->err_recovery_active = true; - ctxi->ctrl_map = NULL; - unmap_context(ctxi); - mutex_unlock(&ctxi->mutex); - } - } - - mutex_unlock(&cfg->ctx_tbl_list_mutex); - return rc; -} - -/* - * Dummy NULL fops - */ -static const struct file_operations null_fops = { - .owner = THIS_MODULE, -}; - -/** - * check_state() - checks and responds to the current adapter state - * @cfg: Internal structure associated with the host. - * - * This routine can block and should only be used on process context. - * It assumes that the caller is an ioctl thread and holding the ioctl - * read semaphore. This is temporarily let up across the wait to allow - * for draining actively running ioctls. Also note that when waking up - * from waiting in reset, the state is unknown and must be checked again - * before proceeding. - * - * Return: 0 on success, -errno on failure - */ -int check_state(struct cxlflash_cfg *cfg) -{ - struct device *dev = &cfg->dev->dev; - int rc = 0; - -retry: - switch (cfg->state) { - case STATE_RESET: - dev_dbg(dev, "%s: Reset state, going to wait...\n", __func__); - up_read(&cfg->ioctl_rwsem); - rc = wait_event_interruptible(cfg->reset_waitq, - cfg->state != STATE_RESET); - down_read(&cfg->ioctl_rwsem); - if (unlikely(rc)) - break; - goto retry; - case STATE_FAILTERM: - dev_dbg(dev, "%s: Failed/Terminating\n", __func__); - rc = -ENODEV; - break; - default: - break; - } - - return rc; -} - -/** - * cxlflash_disk_attach() - attach a LUN to a context - * @sdev: SCSI device associated with LUN. - * @arg: Attach ioctl data structure. - * - * Creates a context and attaches LUN to it. A LUN can only be attached - * one time to a context (subsequent attaches for the same context/LUN pair - * are not supported). Additional LUNs can be attached to a context by - * specifying the 'reuse' flag defined in the cxlflash_ioctl.h header. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_disk_attach(struct scsi_device *sdev, void *arg) -{ - struct dk_cxlflash_attach *attach = arg; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct ctx_info *ctxi = NULL; - struct lun_access *lun_access = NULL; - int rc = 0; - u32 perms; - int ctxid = -1; - u64 irqs = attach->num_interrupts; - u64 flags = 0UL; - u64 rctxid = 0UL; - struct file *file = NULL; - - void *ctx = NULL; - - int fd = -1; - - if (irqs > 4) { - dev_dbg(dev, "%s: Cannot support this many interrupts %llu\n", - __func__, irqs); - rc = -EINVAL; - goto out; - } - - if (gli->max_lba == 0) { - dev_dbg(dev, "%s: No capacity info for LUN=%016llx\n", - __func__, lli->lun_id[sdev->channel]); - rc = read_cap16(sdev, lli); - if (rc) { - dev_err(dev, "%s: Invalid device rc=%d\n", - __func__, rc); - rc = -ENODEV; - goto out; - } - dev_dbg(dev, "%s: LBA = %016llx\n", __func__, gli->max_lba); - dev_dbg(dev, "%s: BLK_LEN = %08x\n", __func__, gli->blk_len); - } - - if (attach->hdr.flags & DK_CXLFLASH_ATTACH_REUSE_CONTEXT) { - rctxid = attach->context_id; - ctxi = get_context(cfg, rctxid, NULL, 0); - if (!ctxi) { - dev_dbg(dev, "%s: Bad context rctxid=%016llx\n", - __func__, rctxid); - rc = -EINVAL; - goto out; - } - - list_for_each_entry(lun_access, &ctxi->luns, list) - if (lun_access->lli == lli) { - dev_dbg(dev, "%s: Already attached\n", - __func__); - rc = -EINVAL; - goto out; - } - } - - rc = scsi_device_get(sdev); - if (unlikely(rc)) { - dev_err(dev, "%s: Unable to get sdev reference\n", __func__); - goto out; - } - - lun_access = kzalloc(sizeof(*lun_access), GFP_KERNEL); - if (unlikely(!lun_access)) { - dev_err(dev, "%s: Unable to allocate lun_access\n", __func__); - rc = -ENOMEM; - goto err; - } - - lun_access->lli = lli; - lun_access->sdev = sdev; - - /* Non-NULL context indicates reuse (another context reference) */ - if (ctxi) { - dev_dbg(dev, "%s: Reusing context for LUN rctxid=%016llx\n", - __func__, rctxid); - kref_get(&ctxi->kref); - list_add(&lun_access->list, &ctxi->luns); - goto out_attach; - } - - ctxi = create_context(cfg); - if (unlikely(!ctxi)) { - dev_err(dev, "%s: Failed to create context ctxid=%d\n", - __func__, ctxid); - rc = -ENOMEM; - goto err; - } - - ctx = cfg->ops->dev_context_init(cfg->dev, cfg->afu_cookie); - if (IS_ERR_OR_NULL(ctx)) { - dev_err(dev, "%s: Could not initialize context %p\n", - __func__, ctx); - rc = -ENODEV; - goto err; - } - - rc = cfg->ops->start_work(ctx, irqs); - if (unlikely(rc)) { - dev_dbg(dev, "%s: Could not start context rc=%d\n", - __func__, rc); - goto err; - } - - ctxid = cfg->ops->process_element(ctx); - if (unlikely((ctxid >= MAX_CONTEXT) || (ctxid < 0))) { - dev_err(dev, "%s: ctxid=%d invalid\n", __func__, ctxid); - rc = -EPERM; - goto err; - } - - file = cfg->ops->get_fd(ctx, &cfg->cxl_fops, &fd); - if (unlikely(fd < 0)) { - rc = -ENODEV; - dev_err(dev, "%s: Could not get file descriptor\n", __func__); - goto err; - } - - /* Translate read/write O_* flags from fcntl.h to AFU permission bits */ - perms = SISL_RHT_PERM(attach->hdr.flags + 1); - - /* Context mutex is locked upon return */ - init_context(ctxi, cfg, ctx, ctxid, file, perms, irqs); - - rc = afu_attach(cfg, ctxi); - if (unlikely(rc)) { - dev_err(dev, "%s: Could not attach AFU rc %d\n", __func__, rc); - goto err; - } - - /* - * No error paths after this point. Once the fd is installed it's - * visible to user space and can't be undone safely on this thread. - * There is no need to worry about a deadlock here because no one - * knows about us yet; we can be the only one holding our mutex. - */ - list_add(&lun_access->list, &ctxi->luns); - mutex_lock(&cfg->ctx_tbl_list_mutex); - mutex_lock(&ctxi->mutex); - cfg->ctx_tbl[ctxid] = ctxi; - mutex_unlock(&cfg->ctx_tbl_list_mutex); - fd_install(fd, file); - -out_attach: - if (fd != -1) - flags |= DK_CXLFLASH_APP_CLOSE_ADAP_FD; - if (afu_is_sq_cmd_mode(afu)) - flags |= DK_CXLFLASH_CONTEXT_SQ_CMD_MODE; - - attach->hdr.return_flags = flags; - attach->context_id = ctxi->ctxid; - attach->block_size = gli->blk_len; - attach->mmio_size = sizeof(afu->afu_map->hosts[0].harea); - attach->last_lba = gli->max_lba; - attach->max_xfer = sdev->host->max_sectors * MAX_SECTOR_UNIT; - attach->max_xfer /= gli->blk_len; - -out: - attach->adap_fd = fd; - - if (ctxi) - put_context(ctxi); - - dev_dbg(dev, "%s: returning ctxid=%d fd=%d bs=%lld rc=%d llba=%lld\n", - __func__, ctxid, fd, attach->block_size, rc, attach->last_lba); - return rc; - -err: - /* Cleanup CXL context; okay to 'stop' even if it was not started */ - if (!IS_ERR_OR_NULL(ctx)) { - cfg->ops->stop_context(ctx); - cfg->ops->release_context(ctx); - ctx = NULL; - } - - /* - * Here, we're overriding the fops with a dummy all-NULL fops because - * fput() calls the release fop, which will cause us to mistakenly - * call into the CXL code. Rather than try to add yet more complexity - * to that routine (cxlflash_cxl_release) we should try to fix the - * issue here. - */ - if (fd > 0) { - file->f_op = &null_fops; - fput(file); - put_unused_fd(fd); - fd = -1; - file = NULL; - } - - /* Cleanup our context */ - if (ctxi) { - destroy_context(cfg, ctxi); - ctxi = NULL; - } - - kfree(lun_access); - scsi_device_put(sdev); - goto out; -} - -/** - * recover_context() - recovers a context in error - * @cfg: Internal structure associated with the host. - * @ctxi: Context to release. - * @adap_fd: Adapter file descriptor associated with new/recovered context. - * - * Restablishes the state for a context-in-error. - * - * Return: 0 on success, -errno on failure - */ -static int recover_context(struct cxlflash_cfg *cfg, - struct ctx_info *ctxi, - int *adap_fd) -{ - struct device *dev = &cfg->dev->dev; - int rc = 0; - int fd = -1; - int ctxid = -1; - struct file *file; - void *ctx; - struct afu *afu = cfg->afu; - - ctx = cfg->ops->dev_context_init(cfg->dev, cfg->afu_cookie); - if (IS_ERR_OR_NULL(ctx)) { - dev_err(dev, "%s: Could not initialize context %p\n", - __func__, ctx); - rc = -ENODEV; - goto out; - } - - rc = cfg->ops->start_work(ctx, ctxi->irqs); - if (unlikely(rc)) { - dev_dbg(dev, "%s: Could not start context rc=%d\n", - __func__, rc); - goto err1; - } - - ctxid = cfg->ops->process_element(ctx); - if (unlikely((ctxid >= MAX_CONTEXT) || (ctxid < 0))) { - dev_err(dev, "%s: ctxid=%d invalid\n", __func__, ctxid); - rc = -EPERM; - goto err2; - } - - file = cfg->ops->get_fd(ctx, &cfg->cxl_fops, &fd); - if (unlikely(fd < 0)) { - rc = -ENODEV; - dev_err(dev, "%s: Could not get file descriptor\n", __func__); - goto err2; - } - - /* Update with new MMIO area based on updated context id */ - ctxi->ctrl_map = &afu->afu_map->ctrls[ctxid].ctrl; - - rc = afu_attach(cfg, ctxi); - if (rc) { - dev_err(dev, "%s: Could not attach AFU rc %d\n", __func__, rc); - goto err3; - } - - /* - * No error paths after this point. Once the fd is installed it's - * visible to user space and can't be undone safely on this thread. - */ - ctxi->ctxid = ENCODE_CTXID(ctxi, ctxid); - ctxi->ctx = ctx; - ctxi->file = file; - - /* - * Put context back in table (note the reinit of the context list); - * we must first drop the context's mutex and then acquire it in - * order with the table/list mutex to avoid a deadlock - safe to do - * here because no one can find us at this moment in time. - */ - mutex_unlock(&ctxi->mutex); - mutex_lock(&cfg->ctx_tbl_list_mutex); - mutex_lock(&ctxi->mutex); - list_del_init(&ctxi->list); - cfg->ctx_tbl[ctxid] = ctxi; - mutex_unlock(&cfg->ctx_tbl_list_mutex); - fd_install(fd, file); - *adap_fd = fd; -out: - dev_dbg(dev, "%s: returning ctxid=%d fd=%d rc=%d\n", - __func__, ctxid, fd, rc); - return rc; - -err3: - fput(file); - put_unused_fd(fd); -err2: - cfg->ops->stop_context(ctx); -err1: - cfg->ops->release_context(ctx); - goto out; -} - -/** - * cxlflash_afu_recover() - initiates AFU recovery - * @sdev: SCSI device associated with LUN. - * @arg: Recover ioctl data structure. - * - * Only a single recovery is allowed at a time to avoid exhausting CXL - * resources (leading to recovery failure) in the event that we're up - * against the maximum number of contexts limit. For similar reasons, - * a context recovery is retried if there are multiple recoveries taking - * place at the same time and the failure was due to CXL services being - * unable to keep up. - * - * As this routine is called on ioctl context, it holds the ioctl r/w - * semaphore that is used to drain ioctls in recovery scenarios. The - * implementation to achieve the pacing described above (a local mutex) - * requires that the ioctl r/w semaphore be dropped and reacquired to - * avoid a 3-way deadlock when multiple process recoveries operate in - * parallel. - * - * Because a user can detect an error condition before the kernel, it is - * quite possible for this routine to act as the kernel's EEH detection - * source (MMIO read of mbox_r). Because of this, there is a window of - * time where an EEH might have been detected but not yet 'serviced' - * (callback invoked, causing the device to enter reset state). To avoid - * looping in this routine during that window, a 1 second sleep is in place - * between the time the MMIO failure is detected and the time a wait on the - * reset wait queue is attempted via check_state(). - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_afu_recover(struct scsi_device *sdev, void *arg) -{ - struct dk_cxlflash_recover_afu *recover = arg; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct afu *afu = cfg->afu; - struct ctx_info *ctxi = NULL; - struct mutex *mutex = &cfg->ctx_recovery_mutex; - struct hwq *hwq = get_hwq(afu, PRIMARY_HWQ); - u64 flags; - u64 ctxid = DECODE_CTXID(recover->context_id), - rctxid = recover->context_id; - long reg; - bool locked = true; - int lretry = 20; /* up to 2 seconds */ - int new_adap_fd = -1; - int rc = 0; - - atomic_inc(&cfg->recovery_threads); - up_read(&cfg->ioctl_rwsem); - rc = mutex_lock_interruptible(mutex); - down_read(&cfg->ioctl_rwsem); - if (rc) { - locked = false; - goto out; - } - - rc = check_state(cfg); - if (rc) { - dev_err(dev, "%s: Failed state rc=%d\n", __func__, rc); - rc = -ENODEV; - goto out; - } - - dev_dbg(dev, "%s: reason=%016llx rctxid=%016llx\n", - __func__, recover->reason, rctxid); - -retry: - /* Ensure that this process is attached to the context */ - ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", __func__, ctxid); - rc = -EINVAL; - goto out; - } - - if (ctxi->err_recovery_active) { -retry_recover: - rc = recover_context(cfg, ctxi, &new_adap_fd); - if (unlikely(rc)) { - dev_err(dev, "%s: Recovery failed ctxid=%llu rc=%d\n", - __func__, ctxid, rc); - if ((rc == -ENODEV) && - ((atomic_read(&cfg->recovery_threads) > 1) || - (lretry--))) { - dev_dbg(dev, "%s: Going to try again\n", - __func__); - mutex_unlock(mutex); - msleep(100); - rc = mutex_lock_interruptible(mutex); - if (rc) { - locked = false; - goto out; - } - goto retry_recover; - } - - goto out; - } - - ctxi->err_recovery_active = false; - - flags = DK_CXLFLASH_APP_CLOSE_ADAP_FD | - DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET; - if (afu_is_sq_cmd_mode(afu)) - flags |= DK_CXLFLASH_CONTEXT_SQ_CMD_MODE; - - recover->hdr.return_flags = flags; - recover->context_id = ctxi->ctxid; - recover->adap_fd = new_adap_fd; - recover->mmio_size = sizeof(afu->afu_map->hosts[0].harea); - goto out; - } - - /* Test if in error state */ - reg = readq_be(&hwq->ctrl_map->mbox_r); - if (reg == -1) { - dev_dbg(dev, "%s: MMIO fail, wait for recovery.\n", __func__); - - /* - * Before checking the state, put back the context obtained with - * get_context() as it is no longer needed and sleep for a short - * period of time (see prolog notes). - */ - put_context(ctxi); - ctxi = NULL; - ssleep(1); - rc = check_state(cfg); - if (unlikely(rc)) - goto out; - goto retry; - } - - dev_dbg(dev, "%s: MMIO working, no recovery required\n", __func__); -out: - if (likely(ctxi)) - put_context(ctxi); - if (locked) - mutex_unlock(mutex); - atomic_dec_if_positive(&cfg->recovery_threads); - return rc; -} - -/** - * process_sense() - evaluates and processes sense data - * @sdev: SCSI device associated with LUN. - * @verify: Verify ioctl data structure. - * - * Return: 0 on success, -errno on failure - */ -static int process_sense(struct scsi_device *sdev, - struct dk_cxlflash_verify *verify) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - u64 prev_lba = gli->max_lba; - struct scsi_sense_hdr sshdr = { 0 }; - int rc = 0; - - rc = scsi_normalize_sense((const u8 *)&verify->sense_data, - DK_CXLFLASH_VERIFY_SENSE_LEN, &sshdr); - if (!rc) { - dev_err(dev, "%s: Failed to normalize sense data\n", __func__); - rc = -EINVAL; - goto out; - } - - switch (sshdr.sense_key) { - case NO_SENSE: - case RECOVERED_ERROR: - case NOT_READY: - break; - case UNIT_ATTENTION: - switch (sshdr.asc) { - case 0x29: /* Power on Reset or Device Reset */ - fallthrough; - case 0x2A: /* Device settings/capacity changed */ - rc = read_cap16(sdev, lli); - if (rc) { - rc = -ENODEV; - break; - } - if (prev_lba != gli->max_lba) - dev_dbg(dev, "%s: Capacity changed old=%lld " - "new=%lld\n", __func__, prev_lba, - gli->max_lba); - break; - case 0x3F: /* Report LUNs changed, Rescan. */ - scsi_scan_host(cfg->host); - break; - default: - rc = -EIO; - break; - } - break; - default: - rc = -EIO; - break; - } -out: - dev_dbg(dev, "%s: sense_key %x asc %x ascq %x rc %d\n", __func__, - sshdr.sense_key, sshdr.asc, sshdr.ascq, rc); - return rc; -} - -/** - * cxlflash_disk_verify() - verifies a LUN is the same and handle size changes - * @sdev: SCSI device associated with LUN. - * @arg: Verify ioctl data structure. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_disk_verify(struct scsi_device *sdev, void *arg) -{ - struct dk_cxlflash_verify *verify = arg; - int rc = 0; - struct ctx_info *ctxi = NULL; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct sisl_rht_entry *rhte = NULL; - res_hndl_t rhndl = verify->rsrc_handle; - u64 ctxid = DECODE_CTXID(verify->context_id), - rctxid = verify->context_id; - u64 last_lba = 0; - - dev_dbg(dev, "%s: ctxid=%llu rhndl=%016llx, hint=%016llx, " - "flags=%016llx\n", __func__, ctxid, verify->rsrc_handle, - verify->hint, verify->hdr.flags); - - ctxi = get_context(cfg, rctxid, lli, 0); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", __func__, ctxid); - rc = -EINVAL; - goto out; - } - - rhte = get_rhte(ctxi, rhndl, lli); - if (unlikely(!rhte)) { - dev_dbg(dev, "%s: Bad resource handle rhndl=%d\n", - __func__, rhndl); - rc = -EINVAL; - goto out; - } - - /* - * Look at the hint/sense to see if it requires us to redrive - * inquiry (i.e. the Unit attention is due to the WWN changing). - */ - if (verify->hint & DK_CXLFLASH_VERIFY_HINT_SENSE) { - /* Can't hold mutex across process_sense/read_cap16, - * since we could have an intervening EEH event. - */ - ctxi->unavail = true; - mutex_unlock(&ctxi->mutex); - rc = process_sense(sdev, verify); - if (unlikely(rc)) { - dev_err(dev, "%s: Failed to validate sense data (%d)\n", - __func__, rc); - mutex_lock(&ctxi->mutex); - ctxi->unavail = false; - goto out; - } - mutex_lock(&ctxi->mutex); - ctxi->unavail = false; - } - - switch (gli->mode) { - case MODE_PHYSICAL: - last_lba = gli->max_lba; - break; - case MODE_VIRTUAL: - /* Cast lxt_cnt to u64 for multiply to be treated as 64bit op */ - last_lba = ((u64)rhte->lxt_cnt * MC_CHUNK_SIZE * gli->blk_len); - last_lba /= CXLFLASH_BLOCK_SIZE; - last_lba--; - break; - default: - WARN(1, "Unsupported LUN mode!"); - } - - verify->last_lba = last_lba; - -out: - if (likely(ctxi)) - put_context(ctxi); - dev_dbg(dev, "%s: returning rc=%d llba=%llx\n", - __func__, rc, verify->last_lba); - return rc; -} - -/** - * decode_ioctl() - translates an encoded ioctl to an easily identifiable string - * @cmd: The ioctl command to decode. - * - * Return: A string identifying the decoded ioctl. - */ -static char *decode_ioctl(unsigned int cmd) -{ - switch (cmd) { - case DK_CXLFLASH_ATTACH: - return __stringify_1(DK_CXLFLASH_ATTACH); - case DK_CXLFLASH_USER_DIRECT: - return __stringify_1(DK_CXLFLASH_USER_DIRECT); - case DK_CXLFLASH_USER_VIRTUAL: - return __stringify_1(DK_CXLFLASH_USER_VIRTUAL); - case DK_CXLFLASH_VLUN_RESIZE: - return __stringify_1(DK_CXLFLASH_VLUN_RESIZE); - case DK_CXLFLASH_RELEASE: - return __stringify_1(DK_CXLFLASH_RELEASE); - case DK_CXLFLASH_DETACH: - return __stringify_1(DK_CXLFLASH_DETACH); - case DK_CXLFLASH_VERIFY: - return __stringify_1(DK_CXLFLASH_VERIFY); - case DK_CXLFLASH_VLUN_CLONE: - return __stringify_1(DK_CXLFLASH_VLUN_CLONE); - case DK_CXLFLASH_RECOVER_AFU: - return __stringify_1(DK_CXLFLASH_RECOVER_AFU); - case DK_CXLFLASH_MANAGE_LUN: - return __stringify_1(DK_CXLFLASH_MANAGE_LUN); - } - - return "UNKNOWN"; -} - -/** - * cxlflash_disk_direct_open() - opens a direct (physical) disk - * @sdev: SCSI device associated with LUN. - * @arg: UDirect ioctl data structure. - * - * On successful return, the user is informed of the resource handle - * to be used to identify the direct lun and the size (in blocks) of - * the direct lun in last LBA format. - * - * Return: 0 on success, -errno on failure - */ -static int cxlflash_disk_direct_open(struct scsi_device *sdev, void *arg) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct dk_cxlflash_release rel = { { 0 }, 0 }; - - struct dk_cxlflash_udirect *pphys = (struct dk_cxlflash_udirect *)arg; - - u64 ctxid = DECODE_CTXID(pphys->context_id), - rctxid = pphys->context_id; - u64 lun_size = 0; - u64 last_lba = 0; - u64 rsrc_handle = -1; - u32 port = CHAN2PORTMASK(sdev->channel); - - int rc = 0; - - struct ctx_info *ctxi = NULL; - struct sisl_rht_entry *rhte = NULL; - - dev_dbg(dev, "%s: ctxid=%llu ls=%llu\n", __func__, ctxid, lun_size); - - rc = cxlflash_lun_attach(gli, MODE_PHYSICAL, false); - if (unlikely(rc)) { - dev_dbg(dev, "%s: Failed attach to LUN (PHYSICAL)\n", __func__); - goto out; - } - - ctxi = get_context(cfg, rctxid, lli, 0); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", __func__, ctxid); - rc = -EINVAL; - goto err1; - } - - rhte = rhte_checkout(ctxi, lli); - if (unlikely(!rhte)) { - dev_dbg(dev, "%s: Too many opens ctxid=%lld\n", - __func__, ctxid); - rc = -EMFILE; /* too many opens */ - goto err1; - } - - rsrc_handle = (rhte - ctxi->rht_start); - - rht_format1(rhte, lli->lun_id[sdev->channel], ctxi->rht_perms, port); - - last_lba = gli->max_lba; - pphys->hdr.return_flags = 0; - pphys->last_lba = last_lba; - pphys->rsrc_handle = rsrc_handle; - - rc = cxlflash_afu_sync(afu, ctxid, rsrc_handle, AFU_LW_SYNC); - if (unlikely(rc)) { - dev_dbg(dev, "%s: AFU sync failed rc=%d\n", __func__, rc); - goto err2; - } - -out: - if (likely(ctxi)) - put_context(ctxi); - dev_dbg(dev, "%s: returning handle=%llu rc=%d llba=%llu\n", - __func__, rsrc_handle, rc, last_lba); - return rc; - -err2: - marshal_udir_to_rele(pphys, &rel); - _cxlflash_disk_release(sdev, ctxi, &rel); - goto out; -err1: - cxlflash_lun_detach(gli); - goto out; -} - -/** - * ioctl_common() - common IOCTL handler for driver - * @sdev: SCSI device associated with LUN. - * @cmd: IOCTL command. - * - * Handles common fencing operations that are valid for multiple ioctls. Always - * allow through ioctls that are cleanup oriented in nature, even when operating - * in a failed/terminating state. - * - * Return: 0 on success, -errno on failure - */ -static int ioctl_common(struct scsi_device *sdev, unsigned int cmd) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - int rc = 0; - - if (unlikely(!lli)) { - dev_dbg(dev, "%s: Unknown LUN\n", __func__); - rc = -EINVAL; - goto out; - } - - rc = check_state(cfg); - if (unlikely(rc) && (cfg->state == STATE_FAILTERM)) { - switch (cmd) { - case DK_CXLFLASH_VLUN_RESIZE: - case DK_CXLFLASH_RELEASE: - case DK_CXLFLASH_DETACH: - dev_dbg(dev, "%s: Command override rc=%d\n", - __func__, rc); - rc = 0; - break; - } - } -out: - return rc; -} - -/** - * cxlflash_ioctl() - IOCTL handler for driver - * @sdev: SCSI device associated with LUN. - * @cmd: IOCTL command. - * @arg: Userspace ioctl data structure. - * - * A read/write semaphore is used to implement a 'drain' of currently - * running ioctls. The read semaphore is taken at the beginning of each - * ioctl thread and released upon concluding execution. Additionally the - * semaphore should be released and then reacquired in any ioctl execution - * path which will wait for an event to occur that is outside the scope of - * the ioctl (i.e. an adapter reset). To drain the ioctls currently running, - * a thread simply needs to acquire the write semaphore. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_ioctl(struct scsi_device *sdev, unsigned int cmd, void __user *arg) -{ - typedef int (*sioctl) (struct scsi_device *, void *); - - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct afu *afu = cfg->afu; - struct dk_cxlflash_hdr *hdr; - char buf[sizeof(union cxlflash_ioctls)]; - size_t size = 0; - bool known_ioctl = false; - int idx; - int rc = 0; - struct Scsi_Host *shost = sdev->host; - sioctl do_ioctl = NULL; - - static const struct { - size_t size; - sioctl ioctl; - } ioctl_tbl[] = { /* NOTE: order matters here */ - {sizeof(struct dk_cxlflash_attach), cxlflash_disk_attach}, - {sizeof(struct dk_cxlflash_udirect), cxlflash_disk_direct_open}, - {sizeof(struct dk_cxlflash_release), cxlflash_disk_release}, - {sizeof(struct dk_cxlflash_detach), cxlflash_disk_detach}, - {sizeof(struct dk_cxlflash_verify), cxlflash_disk_verify}, - {sizeof(struct dk_cxlflash_recover_afu), cxlflash_afu_recover}, - {sizeof(struct dk_cxlflash_manage_lun), cxlflash_manage_lun}, - {sizeof(struct dk_cxlflash_uvirtual), cxlflash_disk_virtual_open}, - {sizeof(struct dk_cxlflash_resize), cxlflash_vlun_resize}, - {sizeof(struct dk_cxlflash_clone), cxlflash_disk_clone}, - }; - - /* Hold read semaphore so we can drain if needed */ - down_read(&cfg->ioctl_rwsem); - - /* Restrict command set to physical support only for internal LUN */ - if (afu->internal_lun) - switch (cmd) { - case DK_CXLFLASH_RELEASE: - case DK_CXLFLASH_USER_VIRTUAL: - case DK_CXLFLASH_VLUN_RESIZE: - case DK_CXLFLASH_VLUN_CLONE: - dev_dbg(dev, "%s: %s not supported for lun_mode=%d\n", - __func__, decode_ioctl(cmd), afu->internal_lun); - rc = -EINVAL; - goto cxlflash_ioctl_exit; - } - - switch (cmd) { - case DK_CXLFLASH_ATTACH: - case DK_CXLFLASH_USER_DIRECT: - case DK_CXLFLASH_RELEASE: - case DK_CXLFLASH_DETACH: - case DK_CXLFLASH_VERIFY: - case DK_CXLFLASH_RECOVER_AFU: - case DK_CXLFLASH_USER_VIRTUAL: - case DK_CXLFLASH_VLUN_RESIZE: - case DK_CXLFLASH_VLUN_CLONE: - dev_dbg(dev, "%s: %s (%08X) on dev(%d/%d/%d/%llu)\n", - __func__, decode_ioctl(cmd), cmd, shost->host_no, - sdev->channel, sdev->id, sdev->lun); - rc = ioctl_common(sdev, cmd); - if (unlikely(rc)) - goto cxlflash_ioctl_exit; - - fallthrough; - - case DK_CXLFLASH_MANAGE_LUN: - known_ioctl = true; - idx = _IOC_NR(cmd) - _IOC_NR(DK_CXLFLASH_ATTACH); - size = ioctl_tbl[idx].size; - do_ioctl = ioctl_tbl[idx].ioctl; - - if (likely(do_ioctl)) - break; - - fallthrough; - default: - rc = -EINVAL; - goto cxlflash_ioctl_exit; - } - - if (unlikely(copy_from_user(&buf, arg, size))) { - dev_err(dev, "%s: copy_from_user() fail size=%lu cmd=%u (%s) arg=%p\n", - __func__, size, cmd, decode_ioctl(cmd), arg); - rc = -EFAULT; - goto cxlflash_ioctl_exit; - } - - hdr = (struct dk_cxlflash_hdr *)&buf; - if (hdr->version != DK_CXLFLASH_VERSION_0) { - dev_dbg(dev, "%s: Version %u not supported for %s\n", - __func__, hdr->version, decode_ioctl(cmd)); - rc = -EINVAL; - goto cxlflash_ioctl_exit; - } - - if (hdr->rsvd[0] || hdr->rsvd[1] || hdr->rsvd[2] || hdr->return_flags) { - dev_dbg(dev, "%s: Reserved/rflags populated\n", __func__); - rc = -EINVAL; - goto cxlflash_ioctl_exit; - } - - rc = do_ioctl(sdev, (void *)&buf); - if (likely(!rc)) - if (unlikely(copy_to_user(arg, &buf, size))) { - dev_err(dev, "%s: copy_to_user() fail size=%lu cmd=%u (%s) arg=%p\n", - __func__, size, cmd, decode_ioctl(cmd), arg); - rc = -EFAULT; - } - - /* fall through to exit */ - -cxlflash_ioctl_exit: - up_read(&cfg->ioctl_rwsem); - if (unlikely(rc && known_ioctl)) - dev_err(dev, "%s: ioctl %s (%08X) on dev(%d/%d/%d/%llu) " - "returned rc %d\n", __func__, - decode_ioctl(cmd), cmd, shost->host_no, - sdev->channel, sdev->id, sdev->lun, rc); - else - dev_dbg(dev, "%s: ioctl %s (%08X) on dev(%d/%d/%d/%llu) " - "returned rc %d\n", __func__, decode_ioctl(cmd), - cmd, shost->host_no, sdev->channel, sdev->id, - sdev->lun, rc); - return rc; -} diff --git a/drivers/scsi/cxlflash/superpipe.h b/drivers/scsi/cxlflash/superpipe.h deleted file mode 100644 index fe8c975d13d7..000000000000 --- a/drivers/scsi/cxlflash/superpipe.h +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#ifndef _CXLFLASH_SUPERPIPE_H -#define _CXLFLASH_SUPERPIPE_H - -extern struct cxlflash_global global; - -/* - * Terminology: use afu (and not adapter) to refer to the HW. - * Adapter is the entire slot and includes PSL out of which - * only the AFU is visible to user space. - */ - -/* Chunk size parms: note sislite minimum chunk size is - * 0x10000 LBAs corresponding to a NMASK or 16. - */ -#define MC_CHUNK_SIZE (1 << MC_RHT_NMASK) /* in LBAs */ - -#define CMD_TIMEOUT 30 /* 30 secs */ -#define CMD_RETRIES 5 /* 5 retries for scsi_execute */ - -#define MAX_SECTOR_UNIT 512 /* max_sector is in 512 byte multiples */ - -enum lun_mode { - MODE_NONE = 0, - MODE_VIRTUAL, - MODE_PHYSICAL -}; - -/* Global (entire driver, spans adapters) lun_info structure */ -struct glun_info { - u64 max_lba; /* from read cap(16) */ - u32 blk_len; /* from read cap(16) */ - enum lun_mode mode; /* NONE, VIRTUAL, PHYSICAL */ - int users; /* Number of users w/ references to LUN */ - - u8 wwid[16]; - - struct mutex mutex; - - struct blka blka; - struct list_head list; -}; - -/* Local (per-adapter) lun_info structure */ -struct llun_info { - u64 lun_id[MAX_FC_PORTS]; /* from REPORT_LUNS */ - u32 lun_index; /* Index in the LUN table */ - u32 host_no; /* host_no from Scsi_host */ - u32 port_sel; /* What port to use for this LUN */ - bool in_table; /* Whether a LUN table entry was created */ - - u8 wwid[16]; /* Keep a duplicate copy here? */ - - struct glun_info *parent; /* Pointer to entry in global LUN structure */ - struct scsi_device *sdev; - struct list_head list; -}; - -struct lun_access { - struct llun_info *lli; - struct scsi_device *sdev; - struct list_head list; -}; - -enum ctx_ctrl { - CTX_CTRL_CLONE = (1 << 1), - CTX_CTRL_ERR = (1 << 2), - CTX_CTRL_ERR_FALLBACK = (1 << 3), - CTX_CTRL_NOPID = (1 << 4), - CTX_CTRL_FILE = (1 << 5) -}; - -#define ENCODE_CTXID(_ctx, _id) (((((u64)_ctx) & 0xFFFFFFFF0ULL) << 28) | _id) -#define DECODE_CTXID(_val) (_val & 0xFFFFFFFF) - -struct ctx_info { - struct sisl_ctrl_map __iomem *ctrl_map; /* initialized at startup */ - struct sisl_rht_entry *rht_start; /* 1 page (req'd for alignment), - * alloc/free on attach/detach - */ - u32 rht_out; /* Number of checked out RHT entries */ - u32 rht_perms; /* User-defined permissions for RHT entries */ - struct llun_info **rht_lun; /* Mapping of RHT entries to LUNs */ - u8 *rht_needs_ws; /* User-desired write-same function per RHTE */ - - u64 ctxid; - u64 irqs; /* Number of interrupts requested for context */ - pid_t pid; - bool initialized; - bool unavail; - bool err_recovery_active; - struct mutex mutex; /* Context protection */ - struct kref kref; - void *ctx; - struct cxlflash_cfg *cfg; - struct list_head luns; /* LUNs attached to this context */ - const struct vm_operations_struct *cxl_mmap_vmops; - struct file *file; - struct list_head list; /* Link contexts in error recovery */ -}; - -struct cxlflash_global { - struct mutex mutex; - struct list_head gluns;/* list of glun_info structs */ - struct page *err_page; /* One page of all 0xF for error notification */ -}; - -int cxlflash_vlun_resize(struct scsi_device *sdev, void *resize); -int _cxlflash_vlun_resize(struct scsi_device *sdev, struct ctx_info *ctxi, - struct dk_cxlflash_resize *resize); - -int cxlflash_disk_release(struct scsi_device *sdev, - void *release); -int _cxlflash_disk_release(struct scsi_device *sdev, struct ctx_info *ctxi, - struct dk_cxlflash_release *release); - -int cxlflash_disk_clone(struct scsi_device *sdev, void *arg); - -int cxlflash_disk_virtual_open(struct scsi_device *sdev, void *arg); - -int cxlflash_lun_attach(struct glun_info *gli, enum lun_mode mode, bool locked); -void cxlflash_lun_detach(struct glun_info *gli); - -struct ctx_info *get_context(struct cxlflash_cfg *cfg, u64 rctxit, void *arg, - enum ctx_ctrl ctrl); -void put_context(struct ctx_info *ctxi); - -struct sisl_rht_entry *get_rhte(struct ctx_info *ctxi, res_hndl_t rhndl, - struct llun_info *lli); - -struct sisl_rht_entry *rhte_checkout(struct ctx_info *ctxi, - struct llun_info *lli); -void rhte_checkin(struct ctx_info *ctxi, struct sisl_rht_entry *rhte); - -void cxlflash_ba_terminate(struct ba_lun *ba_lun); - -int cxlflash_manage_lun(struct scsi_device *sdev, void *manage); - -int check_state(struct cxlflash_cfg *cfg); - -#endif /* ifndef _CXLFLASH_SUPERPIPE_H */ diff --git a/drivers/scsi/cxlflash/vlun.c b/drivers/scsi/cxlflash/vlun.c deleted file mode 100644 index 32e807703377..000000000000 --- a/drivers/scsi/cxlflash/vlun.c +++ /dev/null @@ -1,1336 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#include <linux/interrupt.h> -#include <linux/pci.h> -#include <linux/syscalls.h> -#include <linux/unaligned.h> -#include <asm/bitsperlong.h> - -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_host.h> -#include <uapi/scsi/cxlflash_ioctl.h> - -#include "sislite.h" -#include "common.h" -#include "vlun.h" -#include "superpipe.h" - -/** - * marshal_virt_to_resize() - translate uvirtual to resize structure - * @virt: Source structure from which to translate/copy. - * @resize: Destination structure for the translate/copy. - */ -static void marshal_virt_to_resize(struct dk_cxlflash_uvirtual *virt, - struct dk_cxlflash_resize *resize) -{ - resize->hdr = virt->hdr; - resize->context_id = virt->context_id; - resize->rsrc_handle = virt->rsrc_handle; - resize->req_size = virt->lun_size; - resize->last_lba = virt->last_lba; -} - -/** - * marshal_clone_to_rele() - translate clone to release structure - * @clone: Source structure from which to translate/copy. - * @release: Destination structure for the translate/copy. - */ -static void marshal_clone_to_rele(struct dk_cxlflash_clone *clone, - struct dk_cxlflash_release *release) -{ - release->hdr = clone->hdr; - release->context_id = clone->context_id_dst; -} - -/** - * ba_init() - initializes a block allocator - * @ba_lun: Block allocator to initialize. - * - * Return: 0 on success, -errno on failure - */ -static int ba_init(struct ba_lun *ba_lun) -{ - struct ba_lun_info *bali = NULL; - int lun_size_au = 0, i = 0; - int last_word_underflow = 0; - u64 *lam; - - pr_debug("%s: Initializing LUN: lun_id=%016llx " - "ba_lun->lsize=%lx ba_lun->au_size=%lX\n", - __func__, ba_lun->lun_id, ba_lun->lsize, ba_lun->au_size); - - /* Calculate bit map size */ - lun_size_au = ba_lun->lsize / ba_lun->au_size; - if (lun_size_au == 0) { - pr_debug("%s: Requested LUN size of 0!\n", __func__); - return -EINVAL; - } - - /* Allocate lun information container */ - bali = kzalloc(sizeof(struct ba_lun_info), GFP_KERNEL); - if (unlikely(!bali)) { - pr_err("%s: Failed to allocate lun_info lun_id=%016llx\n", - __func__, ba_lun->lun_id); - return -ENOMEM; - } - - bali->total_aus = lun_size_au; - bali->lun_bmap_size = lun_size_au / BITS_PER_LONG; - - if (lun_size_au % BITS_PER_LONG) - bali->lun_bmap_size++; - - /* Allocate bitmap space */ - bali->lun_alloc_map = kzalloc((bali->lun_bmap_size * sizeof(u64)), - GFP_KERNEL); - if (unlikely(!bali->lun_alloc_map)) { - pr_err("%s: Failed to allocate lun allocation map: " - "lun_id=%016llx\n", __func__, ba_lun->lun_id); - kfree(bali); - return -ENOMEM; - } - - /* Initialize the bit map size and set all bits to '1' */ - bali->free_aun_cnt = lun_size_au; - - for (i = 0; i < bali->lun_bmap_size; i++) - bali->lun_alloc_map[i] = 0xFFFFFFFFFFFFFFFFULL; - - /* If the last word not fully utilized, mark extra bits as allocated */ - last_word_underflow = (bali->lun_bmap_size * BITS_PER_LONG); - last_word_underflow -= bali->free_aun_cnt; - if (last_word_underflow > 0) { - lam = &bali->lun_alloc_map[bali->lun_bmap_size - 1]; - for (i = (HIBIT - last_word_underflow + 1); - i < BITS_PER_LONG; - i++) - clear_bit(i, (ulong *)lam); - } - - /* Initialize high elevator index, low/curr already at 0 from kzalloc */ - bali->free_high_idx = bali->lun_bmap_size; - - /* Allocate clone map */ - bali->aun_clone_map = kzalloc((bali->total_aus * sizeof(u8)), - GFP_KERNEL); - if (unlikely(!bali->aun_clone_map)) { - pr_err("%s: Failed to allocate clone map: lun_id=%016llx\n", - __func__, ba_lun->lun_id); - kfree(bali->lun_alloc_map); - kfree(bali); - return -ENOMEM; - } - - /* Pass the allocated LUN info as a handle to the user */ - ba_lun->ba_lun_handle = bali; - - pr_debug("%s: Successfully initialized the LUN: " - "lun_id=%016llx bitmap size=%x, free_aun_cnt=%llx\n", - __func__, ba_lun->lun_id, bali->lun_bmap_size, - bali->free_aun_cnt); - return 0; -} - -/** - * find_free_range() - locates a free bit within the block allocator - * @low: First word in block allocator to start search. - * @high: Last word in block allocator to search. - * @bali: LUN information structure owning the block allocator to search. - * @bit_word: Passes back the word in the block allocator owning the free bit. - * - * Return: The bit position within the passed back word, -1 on failure - */ -static int find_free_range(u32 low, - u32 high, - struct ba_lun_info *bali, int *bit_word) -{ - int i; - u64 bit_pos = -1; - ulong *lam, num_bits; - - for (i = low; i < high; i++) - if (bali->lun_alloc_map[i] != 0) { - lam = (ulong *)&bali->lun_alloc_map[i]; - num_bits = (sizeof(*lam) * BITS_PER_BYTE); - bit_pos = find_first_bit(lam, num_bits); - - pr_devel("%s: Found free bit %llu in LUN " - "map entry %016llx at bitmap index = %d\n", - __func__, bit_pos, bali->lun_alloc_map[i], i); - - *bit_word = i; - bali->free_aun_cnt--; - clear_bit(bit_pos, lam); - break; - } - - return bit_pos; -} - -/** - * ba_alloc() - allocates a block from the block allocator - * @ba_lun: Block allocator from which to allocate a block. - * - * Return: The allocated block, -1 on failure - */ -static u64 ba_alloc(struct ba_lun *ba_lun) -{ - u64 bit_pos = -1; - int bit_word = 0; - struct ba_lun_info *bali = NULL; - - bali = ba_lun->ba_lun_handle; - - pr_debug("%s: Received block allocation request: " - "lun_id=%016llx free_aun_cnt=%llx\n", - __func__, ba_lun->lun_id, bali->free_aun_cnt); - - if (bali->free_aun_cnt == 0) { - pr_debug("%s: No space left on LUN: lun_id=%016llx\n", - __func__, ba_lun->lun_id); - return -1ULL; - } - - /* Search to find a free entry, curr->high then low->curr */ - bit_pos = find_free_range(bali->free_curr_idx, - bali->free_high_idx, bali, &bit_word); - if (bit_pos == -1) { - bit_pos = find_free_range(bali->free_low_idx, - bali->free_curr_idx, - bali, &bit_word); - if (bit_pos == -1) { - pr_debug("%s: Could not find an allocation unit on LUN:" - " lun_id=%016llx\n", __func__, ba_lun->lun_id); - return -1ULL; - } - } - - /* Update the free_curr_idx */ - if (bit_pos == HIBIT) - bali->free_curr_idx = bit_word + 1; - else - bali->free_curr_idx = bit_word; - - pr_debug("%s: Allocating AU number=%llx lun_id=%016llx " - "free_aun_cnt=%llx\n", __func__, - ((bit_word * BITS_PER_LONG) + bit_pos), ba_lun->lun_id, - bali->free_aun_cnt); - - return (u64) ((bit_word * BITS_PER_LONG) + bit_pos); -} - -/** - * validate_alloc() - validates the specified block has been allocated - * @bali: LUN info owning the block allocator. - * @aun: Block to validate. - * - * Return: 0 on success, -1 on failure - */ -static int validate_alloc(struct ba_lun_info *bali, u64 aun) -{ - int idx = 0, bit_pos = 0; - - idx = aun / BITS_PER_LONG; - bit_pos = aun % BITS_PER_LONG; - - if (test_bit(bit_pos, (ulong *)&bali->lun_alloc_map[idx])) - return -1; - - return 0; -} - -/** - * ba_free() - frees a block from the block allocator - * @ba_lun: Block allocator from which to allocate a block. - * @to_free: Block to free. - * - * Return: 0 on success, -1 on failure - */ -static int ba_free(struct ba_lun *ba_lun, u64 to_free) -{ - int idx = 0, bit_pos = 0; - struct ba_lun_info *bali = NULL; - - bali = ba_lun->ba_lun_handle; - - if (validate_alloc(bali, to_free)) { - pr_debug("%s: AUN %llx is not allocated on lun_id=%016llx\n", - __func__, to_free, ba_lun->lun_id); - return -1; - } - - pr_debug("%s: Received a request to free AU=%llx lun_id=%016llx " - "free_aun_cnt=%llx\n", __func__, to_free, ba_lun->lun_id, - bali->free_aun_cnt); - - if (bali->aun_clone_map[to_free] > 0) { - pr_debug("%s: AUN %llx lun_id=%016llx cloned. Clone count=%x\n", - __func__, to_free, ba_lun->lun_id, - bali->aun_clone_map[to_free]); - bali->aun_clone_map[to_free]--; - return 0; - } - - idx = to_free / BITS_PER_LONG; - bit_pos = to_free % BITS_PER_LONG; - - set_bit(bit_pos, (ulong *)&bali->lun_alloc_map[idx]); - bali->free_aun_cnt++; - - if (idx < bali->free_low_idx) - bali->free_low_idx = idx; - else if (idx > bali->free_high_idx) - bali->free_high_idx = idx; - - pr_debug("%s: Successfully freed AU bit_pos=%x bit map index=%x " - "lun_id=%016llx free_aun_cnt=%llx\n", __func__, bit_pos, idx, - ba_lun->lun_id, bali->free_aun_cnt); - - return 0; -} - -/** - * ba_clone() - Clone a chunk of the block allocation table - * @ba_lun: Block allocator from which to allocate a block. - * @to_clone: Block to clone. - * - * Return: 0 on success, -1 on failure - */ -static int ba_clone(struct ba_lun *ba_lun, u64 to_clone) -{ - struct ba_lun_info *bali = ba_lun->ba_lun_handle; - - if (validate_alloc(bali, to_clone)) { - pr_debug("%s: AUN=%llx not allocated on lun_id=%016llx\n", - __func__, to_clone, ba_lun->lun_id); - return -1; - } - - pr_debug("%s: Received a request to clone AUN %llx on lun_id=%016llx\n", - __func__, to_clone, ba_lun->lun_id); - - if (bali->aun_clone_map[to_clone] == MAX_AUN_CLONE_CNT) { - pr_debug("%s: AUN %llx on lun_id=%016llx hit max clones already\n", - __func__, to_clone, ba_lun->lun_id); - return -1; - } - - bali->aun_clone_map[to_clone]++; - - return 0; -} - -/** - * ba_space() - returns the amount of free space left in the block allocator - * @ba_lun: Block allocator. - * - * Return: Amount of free space in block allocator - */ -static u64 ba_space(struct ba_lun *ba_lun) -{ - struct ba_lun_info *bali = ba_lun->ba_lun_handle; - - return bali->free_aun_cnt; -} - -/** - * cxlflash_ba_terminate() - frees resources associated with the block allocator - * @ba_lun: Block allocator. - * - * Safe to call in a partially allocated state. - */ -void cxlflash_ba_terminate(struct ba_lun *ba_lun) -{ - struct ba_lun_info *bali = ba_lun->ba_lun_handle; - - if (bali) { - kfree(bali->aun_clone_map); - kfree(bali->lun_alloc_map); - kfree(bali); - ba_lun->ba_lun_handle = NULL; - } -} - -/** - * init_vlun() - initializes a LUN for virtual use - * @lli: LUN information structure that owns the block allocator. - * - * Return: 0 on success, -errno on failure - */ -static int init_vlun(struct llun_info *lli) -{ - int rc = 0; - struct glun_info *gli = lli->parent; - struct blka *blka = &gli->blka; - - memset(blka, 0, sizeof(*blka)); - mutex_init(&blka->mutex); - - /* LUN IDs are unique per port, save the index instead */ - blka->ba_lun.lun_id = lli->lun_index; - blka->ba_lun.lsize = gli->max_lba + 1; - blka->ba_lun.lba_size = gli->blk_len; - - blka->ba_lun.au_size = MC_CHUNK_SIZE; - blka->nchunk = blka->ba_lun.lsize / MC_CHUNK_SIZE; - - rc = ba_init(&blka->ba_lun); - if (unlikely(rc)) - pr_debug("%s: cannot init block_alloc, rc=%d\n", __func__, rc); - - pr_debug("%s: returning rc=%d lli=%p\n", __func__, rc, lli); - return rc; -} - -/** - * write_same16() - sends a SCSI WRITE_SAME16 (0) command to specified LUN - * @sdev: SCSI device associated with LUN. - * @lba: Logical block address to start write same. - * @nblks: Number of logical blocks to write same. - * - * The SCSI WRITE_SAME16 can take quite a while to complete. Should an EEH occur - * while in scsi_execute_cmd(), the EEH handler will attempt to recover. As - * part of the recovery, the handler drains all currently running ioctls, - * waiting until they have completed before proceeding with a reset. As this - * routine is used on the ioctl path, this can create a condition where the - * EEH handler becomes stuck, infinitely waiting for this ioctl thread. To - * avoid this behavior, temporarily unmark this thread as an ioctl thread by - * releasing the ioctl read semaphore. This will allow the EEH handler to - * proceed with a recovery while this thread is still running. Once the - * scsi_execute_cmd() returns, reacquire the ioctl read semaphore and check the - * adapter state in case it changed while inside of scsi_execute_cmd(). The - * state check will wait if the adapter is still being recovered or return a - * failure if the recovery failed. In the event that the adapter reset failed, - * simply return the failure as the ioctl would be unable to continue. - * - * Note that the above puts a requirement on this routine to only be called on - * an ioctl thread. - * - * Return: 0 on success, -errno on failure - */ -static int write_same16(struct scsi_device *sdev, - u64 lba, - u32 nblks) -{ - u8 *cmd_buf = NULL; - u8 *scsi_cmd = NULL; - int rc = 0; - int result = 0; - u64 offset = lba; - int left = nblks; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - const u32 s = ilog2(sdev->sector_size) - 9; - const u32 to = sdev->request_queue->rq_timeout; - const u32 ws_limit = - sdev->request_queue->limits.max_write_zeroes_sectors >> s; - - cmd_buf = kzalloc(CMD_BUFSIZE, GFP_KERNEL); - scsi_cmd = kzalloc(MAX_COMMAND_SIZE, GFP_KERNEL); - if (unlikely(!cmd_buf || !scsi_cmd)) { - rc = -ENOMEM; - goto out; - } - - while (left > 0) { - - scsi_cmd[0] = WRITE_SAME_16; - scsi_cmd[1] = cfg->ws_unmap ? 0x8 : 0; - put_unaligned_be64(offset, &scsi_cmd[2]); - put_unaligned_be32(ws_limit < left ? ws_limit : left, - &scsi_cmd[10]); - - /* Drop the ioctl read semaphore across lengthy call */ - up_read(&cfg->ioctl_rwsem); - result = scsi_execute_cmd(sdev, scsi_cmd, REQ_OP_DRV_OUT, - cmd_buf, CMD_BUFSIZE, to, - CMD_RETRIES, NULL); - down_read(&cfg->ioctl_rwsem); - rc = check_state(cfg); - if (rc) { - dev_err(dev, "%s: Failed state result=%08x\n", - __func__, result); - rc = -ENODEV; - goto out; - } - - if (result) { - dev_err_ratelimited(dev, "%s: command failed for " - "offset=%lld result=%08x\n", - __func__, offset, result); - rc = -EIO; - goto out; - } - left -= ws_limit; - offset += ws_limit; - } - -out: - kfree(cmd_buf); - kfree(scsi_cmd); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * grow_lxt() - expands the translation table associated with the specified RHTE - * @afu: AFU associated with the host. - * @sdev: SCSI device associated with LUN. - * @ctxid: Context ID of context owning the RHTE. - * @rhndl: Resource handle associated with the RHTE. - * @rhte: Resource handle entry (RHTE). - * @new_size: Number of translation entries associated with RHTE. - * - * By design, this routine employs a 'best attempt' allocation and will - * truncate the requested size down if there is not sufficient space in - * the block allocator to satisfy the request but there does exist some - * amount of space. The user is made aware of this by returning the size - * allocated. - * - * Return: 0 on success, -errno on failure - */ -static int grow_lxt(struct afu *afu, - struct scsi_device *sdev, - ctx_hndl_t ctxid, - res_hndl_t rhndl, - struct sisl_rht_entry *rhte, - u64 *new_size) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct sisl_lxt_entry *lxt = NULL, *lxt_old = NULL; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct blka *blka = &gli->blka; - u32 av_size; - u32 ngrps, ngrps_old; - u64 aun; /* chunk# allocated by block allocator */ - u64 delta = *new_size - rhte->lxt_cnt; - u64 my_new_size; - int i, rc = 0; - - /* - * Check what is available in the block allocator before re-allocating - * LXT array. This is done up front under the mutex which must not be - * released until after allocation is complete. - */ - mutex_lock(&blka->mutex); - av_size = ba_space(&blka->ba_lun); - if (unlikely(av_size <= 0)) { - dev_dbg(dev, "%s: ba_space error av_size=%d\n", - __func__, av_size); - mutex_unlock(&blka->mutex); - rc = -ENOSPC; - goto out; - } - - if (av_size < delta) - delta = av_size; - - lxt_old = rhte->lxt_start; - ngrps_old = LXT_NUM_GROUPS(rhte->lxt_cnt); - ngrps = LXT_NUM_GROUPS(rhte->lxt_cnt + delta); - - if (ngrps != ngrps_old) { - /* reallocate to fit new size */ - lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps), - GFP_KERNEL); - if (unlikely(!lxt)) { - mutex_unlock(&blka->mutex); - rc = -ENOMEM; - goto out; - } - - /* copy over all old entries */ - memcpy(lxt, lxt_old, (sizeof(*lxt) * rhte->lxt_cnt)); - } else - lxt = lxt_old; - - /* nothing can fail from now on */ - my_new_size = rhte->lxt_cnt + delta; - - /* add new entries to the end */ - for (i = rhte->lxt_cnt; i < my_new_size; i++) { - /* - * Due to the earlier check of available space, ba_alloc - * cannot fail here. If it did due to internal error, - * leave a rlba_base of -1u which will likely be a - * invalid LUN (too large). - */ - aun = ba_alloc(&blka->ba_lun); - if ((aun == -1ULL) || (aun >= blka->nchunk)) - dev_dbg(dev, "%s: ba_alloc error allocated chunk=%llu " - "max=%llu\n", __func__, aun, blka->nchunk - 1); - - /* select both ports, use r/w perms from RHT */ - lxt[i].rlba_base = ((aun << MC_CHUNK_SHIFT) | - (lli->lun_index << LXT_LUNIDX_SHIFT) | - (RHT_PERM_RW << LXT_PERM_SHIFT | - lli->port_sel)); - } - - mutex_unlock(&blka->mutex); - - /* - * The following sequence is prescribed in the SISlite spec - * for syncing up with the AFU when adding LXT entries. - */ - dma_wmb(); /* Make LXT updates are visible */ - - rhte->lxt_start = lxt; - dma_wmb(); /* Make RHT entry's LXT table update visible */ - - rhte->lxt_cnt = my_new_size; - dma_wmb(); /* Make RHT entry's LXT table size update visible */ - - rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC); - if (unlikely(rc)) - rc = -EAGAIN; - - /* free old lxt if reallocated */ - if (lxt != lxt_old) - kfree(lxt_old); - *new_size = my_new_size; -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * shrink_lxt() - reduces translation table associated with the specified RHTE - * @afu: AFU associated with the host. - * @sdev: SCSI device associated with LUN. - * @rhndl: Resource handle associated with the RHTE. - * @rhte: Resource handle entry (RHTE). - * @ctxi: Context owning resources. - * @new_size: Number of translation entries associated with RHTE. - * - * Return: 0 on success, -errno on failure - */ -static int shrink_lxt(struct afu *afu, - struct scsi_device *sdev, - res_hndl_t rhndl, - struct sisl_rht_entry *rhte, - struct ctx_info *ctxi, - u64 *new_size) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct sisl_lxt_entry *lxt, *lxt_old; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct blka *blka = &gli->blka; - ctx_hndl_t ctxid = DECODE_CTXID(ctxi->ctxid); - bool needs_ws = ctxi->rht_needs_ws[rhndl]; - bool needs_sync = !ctxi->err_recovery_active; - u32 ngrps, ngrps_old; - u64 aun; /* chunk# allocated by block allocator */ - u64 delta = rhte->lxt_cnt - *new_size; - u64 my_new_size; - int i, rc = 0; - - lxt_old = rhte->lxt_start; - ngrps_old = LXT_NUM_GROUPS(rhte->lxt_cnt); - ngrps = LXT_NUM_GROUPS(rhte->lxt_cnt - delta); - - if (ngrps != ngrps_old) { - /* Reallocate to fit new size unless new size is 0 */ - if (ngrps) { - lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps), - GFP_KERNEL); - if (unlikely(!lxt)) { - rc = -ENOMEM; - goto out; - } - - /* Copy over old entries that will remain */ - memcpy(lxt, lxt_old, - (sizeof(*lxt) * (rhte->lxt_cnt - delta))); - } else - lxt = NULL; - } else - lxt = lxt_old; - - /* Nothing can fail from now on */ - my_new_size = rhte->lxt_cnt - delta; - - /* - * The following sequence is prescribed in the SISlite spec - * for syncing up with the AFU when removing LXT entries. - */ - rhte->lxt_cnt = my_new_size; - dma_wmb(); /* Make RHT entry's LXT table size update visible */ - - rhte->lxt_start = lxt; - dma_wmb(); /* Make RHT entry's LXT table update visible */ - - if (needs_sync) { - rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC); - if (unlikely(rc)) - rc = -EAGAIN; - } - - if (needs_ws) { - /* - * Mark the context as unavailable, so that we can release - * the mutex safely. - */ - ctxi->unavail = true; - mutex_unlock(&ctxi->mutex); - } - - /* Free LBAs allocated to freed chunks */ - mutex_lock(&blka->mutex); - for (i = delta - 1; i >= 0; i--) { - aun = lxt_old[my_new_size + i].rlba_base >> MC_CHUNK_SHIFT; - if (needs_ws) - write_same16(sdev, aun, MC_CHUNK_SIZE); - ba_free(&blka->ba_lun, aun); - } - mutex_unlock(&blka->mutex); - - if (needs_ws) { - /* Make the context visible again */ - mutex_lock(&ctxi->mutex); - ctxi->unavail = false; - } - - /* Free old lxt if reallocated */ - if (lxt != lxt_old) - kfree(lxt_old); - *new_size = my_new_size; -out: - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * _cxlflash_vlun_resize() - changes the size of a virtual LUN - * @sdev: SCSI device associated with LUN owning virtual LUN. - * @ctxi: Context owning resources. - * @resize: Resize ioctl data structure. - * - * On successful return, the user is informed of the new size (in blocks) - * of the virtual LUN in last LBA format. When the size of the virtual - * LUN is zero, the last LBA is reflected as -1. See comment in the - * prologue for _cxlflash_disk_release() regarding AFU syncs and contexts - * on the error recovery list. - * - * Return: 0 on success, -errno on failure - */ -int _cxlflash_vlun_resize(struct scsi_device *sdev, - struct ctx_info *ctxi, - struct dk_cxlflash_resize *resize) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct afu *afu = cfg->afu; - bool put_ctx = false; - - res_hndl_t rhndl = resize->rsrc_handle; - u64 new_size; - u64 nsectors; - u64 ctxid = DECODE_CTXID(resize->context_id), - rctxid = resize->context_id; - - struct sisl_rht_entry *rhte; - - int rc = 0; - - /* - * The requested size (req_size) is always assumed to be in 4k blocks, - * so we have to convert it here from 4k to chunk size. - */ - nsectors = (resize->req_size * CXLFLASH_BLOCK_SIZE) / gli->blk_len; - new_size = DIV_ROUND_UP(nsectors, MC_CHUNK_SIZE); - - dev_dbg(dev, "%s: ctxid=%llu rhndl=%llu req_size=%llu new_size=%llu\n", - __func__, ctxid, resize->rsrc_handle, resize->req_size, - new_size); - - if (unlikely(gli->mode != MODE_VIRTUAL)) { - dev_dbg(dev, "%s: LUN mode does not support resize mode=%d\n", - __func__, gli->mode); - rc = -EINVAL; - goto out; - - } - - if (!ctxi) { - ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK); - if (unlikely(!ctxi)) { - dev_dbg(dev, "%s: Bad context ctxid=%llu\n", - __func__, ctxid); - rc = -EINVAL; - goto out; - } - - put_ctx = true; - } - - rhte = get_rhte(ctxi, rhndl, lli); - if (unlikely(!rhte)) { - dev_dbg(dev, "%s: Bad resource handle rhndl=%u\n", - __func__, rhndl); - rc = -EINVAL; - goto out; - } - - if (new_size > rhte->lxt_cnt) - rc = grow_lxt(afu, sdev, ctxid, rhndl, rhte, &new_size); - else if (new_size < rhte->lxt_cnt) - rc = shrink_lxt(afu, sdev, rhndl, rhte, ctxi, &new_size); - else { - /* - * Rare case where there is already sufficient space, just - * need to perform a translation sync with the AFU. This - * scenario likely follows a previous sync failure during - * a resize operation. Accordingly, perform the heavyweight - * form of translation sync as it is unknown which type of - * resize failed previously. - */ - rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_HW_SYNC); - if (unlikely(rc)) { - rc = -EAGAIN; - goto out; - } - } - - resize->hdr.return_flags = 0; - resize->last_lba = (new_size * MC_CHUNK_SIZE * gli->blk_len); - resize->last_lba /= CXLFLASH_BLOCK_SIZE; - resize->last_lba--; - -out: - if (put_ctx) - put_context(ctxi); - dev_dbg(dev, "%s: resized to %llu returning rc=%d\n", - __func__, resize->last_lba, rc); - return rc; -} - -int cxlflash_vlun_resize(struct scsi_device *sdev, void *resize) -{ - return _cxlflash_vlun_resize(sdev, NULL, resize); -} - -/** - * cxlflash_restore_luntable() - Restore LUN table to prior state - * @cfg: Internal structure associated with the host. - */ -void cxlflash_restore_luntable(struct cxlflash_cfg *cfg) -{ - struct llun_info *lli, *temp; - u32 lind; - int k; - struct device *dev = &cfg->dev->dev; - __be64 __iomem *fc_port_luns; - - mutex_lock(&global.mutex); - - list_for_each_entry_safe(lli, temp, &cfg->lluns, list) { - if (!lli->in_table) - continue; - - lind = lli->lun_index; - dev_dbg(dev, "%s: Virtual LUNs on slot %d:\n", __func__, lind); - - for (k = 0; k < cfg->num_fc_ports; k++) - if (lli->port_sel & (1 << k)) { - fc_port_luns = get_fc_port_luns(cfg, k); - writeq_be(lli->lun_id[k], &fc_port_luns[lind]); - dev_dbg(dev, "\t%d=%llx\n", k, lli->lun_id[k]); - } - } - - mutex_unlock(&global.mutex); -} - -/** - * get_num_ports() - compute number of ports from port selection mask - * @psm: Port selection mask. - * - * Return: Population count of port selection mask - */ -static inline u8 get_num_ports(u32 psm) -{ - static const u8 bits[16] = { 0, 1, 1, 2, 1, 2, 2, 3, - 1, 2, 2, 3, 2, 3, 3, 4 }; - - return bits[psm & 0xf]; -} - -/** - * init_luntable() - write an entry in the LUN table - * @cfg: Internal structure associated with the host. - * @lli: Per adapter LUN information structure. - * - * On successful return, a LUN table entry is created: - * - at the top for LUNs visible on multiple ports. - * - at the bottom for LUNs visible only on one port. - * - * Return: 0 on success, -errno on failure - */ -static int init_luntable(struct cxlflash_cfg *cfg, struct llun_info *lli) -{ - u32 chan; - u32 lind; - u32 nports; - int rc = 0; - int k; - struct device *dev = &cfg->dev->dev; - __be64 __iomem *fc_port_luns; - - mutex_lock(&global.mutex); - - if (lli->in_table) - goto out; - - nports = get_num_ports(lli->port_sel); - if (nports == 0 || nports > cfg->num_fc_ports) { - WARN(1, "Unsupported port configuration nports=%u", nports); - rc = -EIO; - goto out; - } - - if (nports > 1) { - /* - * When LUN is visible from multiple ports, we will put - * it in the top half of the LUN table. - */ - for (k = 0; k < cfg->num_fc_ports; k++) { - if (!(lli->port_sel & (1 << k))) - continue; - - if (cfg->promote_lun_index == cfg->last_lun_index[k]) { - rc = -ENOSPC; - goto out; - } - } - - lind = lli->lun_index = cfg->promote_lun_index; - dev_dbg(dev, "%s: Virtual LUNs on slot %d:\n", __func__, lind); - - for (k = 0; k < cfg->num_fc_ports; k++) { - if (!(lli->port_sel & (1 << k))) - continue; - - fc_port_luns = get_fc_port_luns(cfg, k); - writeq_be(lli->lun_id[k], &fc_port_luns[lind]); - dev_dbg(dev, "\t%d=%llx\n", k, lli->lun_id[k]); - } - - cfg->promote_lun_index++; - } else { - /* - * When LUN is visible only from one port, we will put - * it in the bottom half of the LUN table. - */ - chan = PORTMASK2CHAN(lli->port_sel); - if (cfg->promote_lun_index == cfg->last_lun_index[chan]) { - rc = -ENOSPC; - goto out; - } - - lind = lli->lun_index = cfg->last_lun_index[chan]; - fc_port_luns = get_fc_port_luns(cfg, chan); - writeq_be(lli->lun_id[chan], &fc_port_luns[lind]); - cfg->last_lun_index[chan]--; - dev_dbg(dev, "%s: Virtual LUNs on slot %d:\n\t%d=%llx\n", - __func__, lind, chan, lli->lun_id[chan]); - } - - lli->in_table = true; -out: - mutex_unlock(&global.mutex); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -} - -/** - * cxlflash_disk_virtual_open() - open a virtual disk of specified size - * @sdev: SCSI device associated with LUN owning virtual LUN. - * @arg: UVirtual ioctl data structure. - * - * On successful return, the user is informed of the resource handle - * to be used to identify the virtual LUN and the size (in blocks) of - * the virtual LUN in last LBA format. When the size of the virtual LUN - * is zero, the last LBA is reflected as -1. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_disk_virtual_open(struct scsi_device *sdev, void *arg) -{ - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - - struct dk_cxlflash_uvirtual *virt = (struct dk_cxlflash_uvirtual *)arg; - struct dk_cxlflash_resize resize; - - u64 ctxid = DECODE_CTXID(virt->context_id), - rctxid = virt->context_id; - u64 lun_size = virt->lun_size; - u64 last_lba = 0; - u64 rsrc_handle = -1; - - int rc = 0; - - struct ctx_info *ctxi = NULL; - struct sisl_rht_entry *rhte = NULL; - - dev_dbg(dev, "%s: ctxid=%llu ls=%llu\n", __func__, ctxid, lun_size); - - /* Setup the LUNs block allocator on first call */ - mutex_lock(&gli->mutex); - if (gli->mode == MODE_NONE) { - rc = init_vlun(lli); - if (rc) { - dev_err(dev, "%s: init_vlun failed rc=%d\n", - __func__, rc); - rc = -ENOMEM; - goto err0; - } - } - - rc = cxlflash_lun_attach(gli, MODE_VIRTUAL, true); - if (unlikely(rc)) { - dev_err(dev, "%s: Failed attach to LUN (VIRTUAL)\n", __func__); - goto err0; - } - mutex_unlock(&gli->mutex); - - rc = init_luntable(cfg, lli); - if (rc) { - dev_err(dev, "%s: init_luntable failed rc=%d\n", __func__, rc); - goto err1; - } - - ctxi = get_context(cfg, rctxid, lli, 0); - if (unlikely(!ctxi)) { - dev_err(dev, "%s: Bad context ctxid=%llu\n", __func__, ctxid); - rc = -EINVAL; - goto err1; - } - - rhte = rhte_checkout(ctxi, lli); - if (unlikely(!rhte)) { - dev_err(dev, "%s: too many opens ctxid=%llu\n", - __func__, ctxid); - rc = -EMFILE; /* too many opens */ - goto err1; - } - - rsrc_handle = (rhte - ctxi->rht_start); - - /* Populate RHT format 0 */ - rhte->nmask = MC_RHT_NMASK; - rhte->fp = SISL_RHT_FP(0U, ctxi->rht_perms); - - /* Resize even if requested size is 0 */ - marshal_virt_to_resize(virt, &resize); - resize.rsrc_handle = rsrc_handle; - rc = _cxlflash_vlun_resize(sdev, ctxi, &resize); - if (rc) { - dev_err(dev, "%s: resize failed rc=%d\n", __func__, rc); - goto err2; - } - last_lba = resize.last_lba; - - if (virt->hdr.flags & DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME) - ctxi->rht_needs_ws[rsrc_handle] = true; - - virt->hdr.return_flags = 0; - virt->last_lba = last_lba; - virt->rsrc_handle = rsrc_handle; - - if (get_num_ports(lli->port_sel) > 1) - virt->hdr.return_flags |= DK_CXLFLASH_ALL_PORTS_ACTIVE; -out: - if (likely(ctxi)) - put_context(ctxi); - dev_dbg(dev, "%s: returning handle=%llu rc=%d llba=%llu\n", - __func__, rsrc_handle, rc, last_lba); - return rc; - -err2: - rhte_checkin(ctxi, rhte); -err1: - cxlflash_lun_detach(gli); - goto out; -err0: - /* Special common cleanup prior to successful LUN attach */ - cxlflash_ba_terminate(&gli->blka.ba_lun); - mutex_unlock(&gli->mutex); - goto out; -} - -/** - * clone_lxt() - copies translation tables from source to destination RHTE - * @afu: AFU associated with the host. - * @blka: Block allocator associated with LUN. - * @ctxid: Context ID of context owning the RHTE. - * @rhndl: Resource handle associated with the RHTE. - * @rhte: Destination resource handle entry (RHTE). - * @rhte_src: Source resource handle entry (RHTE). - * - * Return: 0 on success, -errno on failure - */ -static int clone_lxt(struct afu *afu, - struct blka *blka, - ctx_hndl_t ctxid, - res_hndl_t rhndl, - struct sisl_rht_entry *rhte, - struct sisl_rht_entry *rhte_src) -{ - struct cxlflash_cfg *cfg = afu->parent; - struct device *dev = &cfg->dev->dev; - struct sisl_lxt_entry *lxt = NULL; - bool locked = false; - u32 ngrps; - u64 aun; /* chunk# allocated by block allocator */ - int j; - int i = 0; - int rc = 0; - - ngrps = LXT_NUM_GROUPS(rhte_src->lxt_cnt); - - if (ngrps) { - /* allocate new LXTs for clone */ - lxt = kzalloc((sizeof(*lxt) * LXT_GROUP_SIZE * ngrps), - GFP_KERNEL); - if (unlikely(!lxt)) { - rc = -ENOMEM; - goto out; - } - - /* copy over */ - memcpy(lxt, rhte_src->lxt_start, - (sizeof(*lxt) * rhte_src->lxt_cnt)); - - /* clone the LBAs in block allocator via ref_cnt, note that the - * block allocator mutex must be held until it is established - * that this routine will complete without the need for a - * cleanup. - */ - mutex_lock(&blka->mutex); - locked = true; - for (i = 0; i < rhte_src->lxt_cnt; i++) { - aun = (lxt[i].rlba_base >> MC_CHUNK_SHIFT); - if (ba_clone(&blka->ba_lun, aun) == -1ULL) { - rc = -EIO; - goto err; - } - } - } - - /* - * The following sequence is prescribed in the SISlite spec - * for syncing up with the AFU when adding LXT entries. - */ - dma_wmb(); /* Make LXT updates are visible */ - - rhte->lxt_start = lxt; - dma_wmb(); /* Make RHT entry's LXT table update visible */ - - rhte->lxt_cnt = rhte_src->lxt_cnt; - dma_wmb(); /* Make RHT entry's LXT table size update visible */ - - rc = cxlflash_afu_sync(afu, ctxid, rhndl, AFU_LW_SYNC); - if (unlikely(rc)) { - rc = -EAGAIN; - goto err2; - } - -out: - if (locked) - mutex_unlock(&blka->mutex); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; -err2: - /* Reset the RHTE */ - rhte->lxt_cnt = 0; - dma_wmb(); - rhte->lxt_start = NULL; - dma_wmb(); -err: - /* free the clones already made */ - for (j = 0; j < i; j++) { - aun = (lxt[j].rlba_base >> MC_CHUNK_SHIFT); - ba_free(&blka->ba_lun, aun); - } - kfree(lxt); - goto out; -} - -/** - * cxlflash_disk_clone() - clone a context by making snapshot of another - * @sdev: SCSI device associated with LUN owning virtual LUN. - * @arg: Clone ioctl data structure. - * - * This routine effectively performs cxlflash_disk_open operation for each - * in-use virtual resource in the source context. Note that the destination - * context must be in pristine state and cannot have any resource handles - * open at the time of the clone. - * - * Return: 0 on success, -errno on failure - */ -int cxlflash_disk_clone(struct scsi_device *sdev, void *arg) -{ - struct dk_cxlflash_clone *clone = arg; - struct cxlflash_cfg *cfg = shost_priv(sdev->host); - struct device *dev = &cfg->dev->dev; - struct llun_info *lli = sdev->hostdata; - struct glun_info *gli = lli->parent; - struct blka *blka = &gli->blka; - struct afu *afu = cfg->afu; - struct dk_cxlflash_release release = { { 0 }, 0 }; - - struct ctx_info *ctxi_src = NULL, - *ctxi_dst = NULL; - struct lun_access *lun_access_src, *lun_access_dst; - u32 perms; - u64 ctxid_src = DECODE_CTXID(clone->context_id_src), - ctxid_dst = DECODE_CTXID(clone->context_id_dst), - rctxid_src = clone->context_id_src, - rctxid_dst = clone->context_id_dst; - int i, j; - int rc = 0; - bool found; - LIST_HEAD(sidecar); - - dev_dbg(dev, "%s: ctxid_src=%llu ctxid_dst=%llu\n", - __func__, ctxid_src, ctxid_dst); - - /* Do not clone yourself */ - if (unlikely(rctxid_src == rctxid_dst)) { - rc = -EINVAL; - goto out; - } - - if (unlikely(gli->mode != MODE_VIRTUAL)) { - rc = -EINVAL; - dev_dbg(dev, "%s: Only supported on virtual LUNs mode=%u\n", - __func__, gli->mode); - goto out; - } - - ctxi_src = get_context(cfg, rctxid_src, lli, CTX_CTRL_CLONE); - ctxi_dst = get_context(cfg, rctxid_dst, lli, 0); - if (unlikely(!ctxi_src || !ctxi_dst)) { - dev_dbg(dev, "%s: Bad context ctxid_src=%llu ctxid_dst=%llu\n", - __func__, ctxid_src, ctxid_dst); - rc = -EINVAL; - goto out; - } - - /* Verify there is no open resource handle in the destination context */ - for (i = 0; i < MAX_RHT_PER_CONTEXT; i++) - if (ctxi_dst->rht_start[i].nmask != 0) { - rc = -EINVAL; - goto out; - } - - /* Clone LUN access list */ - list_for_each_entry(lun_access_src, &ctxi_src->luns, list) { - found = false; - list_for_each_entry(lun_access_dst, &ctxi_dst->luns, list) - if (lun_access_dst->sdev == lun_access_src->sdev) { - found = true; - break; - } - - if (!found) { - lun_access_dst = kzalloc(sizeof(*lun_access_dst), - GFP_KERNEL); - if (unlikely(!lun_access_dst)) { - dev_err(dev, "%s: lun_access allocation fail\n", - __func__); - rc = -ENOMEM; - goto out; - } - - *lun_access_dst = *lun_access_src; - list_add(&lun_access_dst->list, &sidecar); - } - } - - if (unlikely(!ctxi_src->rht_out)) { - dev_dbg(dev, "%s: Nothing to clone\n", __func__); - goto out_success; - } - - /* User specified permission on attach */ - perms = ctxi_dst->rht_perms; - - /* - * Copy over checked-out RHT (and their associated LXT) entries by - * hand, stopping after we've copied all outstanding entries and - * cleaning up if the clone fails. - * - * Note: This loop is equivalent to performing cxlflash_disk_open and - * cxlflash_vlun_resize. As such, LUN accounting needs to be taken into - * account by attaching after each successful RHT entry clone. In the - * event that a clone failure is experienced, the LUN detach is handled - * via the cleanup performed by _cxlflash_disk_release. - */ - for (i = 0; i < MAX_RHT_PER_CONTEXT; i++) { - if (ctxi_src->rht_out == ctxi_dst->rht_out) - break; - if (ctxi_src->rht_start[i].nmask == 0) - continue; - - /* Consume a destination RHT entry */ - ctxi_dst->rht_out++; - ctxi_dst->rht_start[i].nmask = ctxi_src->rht_start[i].nmask; - ctxi_dst->rht_start[i].fp = - SISL_RHT_FP_CLONE(ctxi_src->rht_start[i].fp, perms); - ctxi_dst->rht_lun[i] = ctxi_src->rht_lun[i]; - - rc = clone_lxt(afu, blka, ctxid_dst, i, - &ctxi_dst->rht_start[i], - &ctxi_src->rht_start[i]); - if (rc) { - marshal_clone_to_rele(clone, &release); - for (j = 0; j < i; j++) { - release.rsrc_handle = j; - _cxlflash_disk_release(sdev, ctxi_dst, - &release); - } - - /* Put back the one we failed on */ - rhte_checkin(ctxi_dst, &ctxi_dst->rht_start[i]); - goto err; - } - - cxlflash_lun_attach(gli, gli->mode, false); - } - -out_success: - list_splice(&sidecar, &ctxi_dst->luns); - - /* fall through */ -out: - if (ctxi_src) - put_context(ctxi_src); - if (ctxi_dst) - put_context(ctxi_dst); - dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); - return rc; - -err: - list_for_each_entry_safe(lun_access_src, lun_access_dst, &sidecar, list) - kfree(lun_access_src); - goto out; -} diff --git a/drivers/scsi/cxlflash/vlun.h b/drivers/scsi/cxlflash/vlun.h deleted file mode 100644 index 68e3ea52fe80..000000000000 --- a/drivers/scsi/cxlflash/vlun.h +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - */ - -#ifndef _CXLFLASH_VLUN_H -#define _CXLFLASH_VLUN_H - -/* RHT - Resource Handle Table */ -#define MC_RHT_NMASK 16 /* in bits */ -#define MC_CHUNK_SHIFT MC_RHT_NMASK /* shift to go from LBA to chunk# */ - -#define HIBIT (BITS_PER_LONG - 1) - -#define MAX_AUN_CLONE_CNT 0xFF - -/* - * LXT - LBA Translation Table - * - * +-------+-------+-------+-------+-------+-------+-------+---+---+ - * | RLBA_BASE |LUN_IDX| P |SEL| - * +-------+-------+-------+-------+-------+-------+-------+---+---+ - * - * The LXT Entry contains the physical LBA where the chunk starts (RLBA_BASE). - * AFU ORes the low order bits from the virtual LBA (offset into the chunk) - * with RLBA_BASE. The result is the physical LBA to be sent to storage. - * The LXT Entry also contains an index to a LUN TBL and a bitmask of which - * outgoing (FC) * ports can be selected. The port select bit-mask is ANDed - * with a global port select bit-mask maintained by the driver. - * In addition, it has permission bits that are ANDed with the - * RHT permissions to arrive at the final permissions for the chunk. - * - * LXT tables are allocated dynamically in groups. This is done to avoid - * a malloc/free overhead each time the LXT has to grow or shrink. - * - * Based on the current lxt_cnt (used), it is always possible to know - * how many are allocated (used+free). The number of allocated entries is - * not stored anywhere. - * - * The LXT table is re-allocated whenever it needs to cross into another group. - */ -#define LXT_GROUP_SIZE 8 -#define LXT_NUM_GROUPS(lxt_cnt) (((lxt_cnt) + 7)/8) /* alloc'ed groups */ -#define LXT_LUNIDX_SHIFT 8 /* LXT entry, shift for LUN index */ -#define LXT_PERM_SHIFT 4 /* LXT entry, shift for permission bits */ - -struct ba_lun_info { - u64 *lun_alloc_map; - u32 lun_bmap_size; - u32 total_aus; - u64 free_aun_cnt; - - /* indices to be used for elevator lookup of free map */ - u32 free_low_idx; - u32 free_curr_idx; - u32 free_high_idx; - - u8 *aun_clone_map; -}; - -struct ba_lun { - u64 lun_id; - u64 wwpn; - size_t lsize; /* LUN size in number of LBAs */ - size_t lba_size; /* LBA size in number of bytes */ - size_t au_size; /* Allocation Unit size in number of LBAs */ - struct ba_lun_info *ba_lun_handle; -}; - -/* Block Allocator */ -struct blka { - struct ba_lun ba_lun; - u64 nchunk; /* number of chunks */ - struct mutex mutex; -}; - -#endif /* ifndef _CXLFLASH_SUPERPIPE_H */ diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c index 8469c156ab33..59f277593785 100644 --- a/drivers/scsi/elx/efct/efct_driver.c +++ b/drivers/scsi/elx/efct/efct_driver.c @@ -735,7 +735,7 @@ efct_pci_io_resume(struct pci_dev *pdev) MODULE_DEVICE_TABLE(pci, efct_pci_table); -static struct pci_error_handlers efct_pci_err_handler = { +static const struct pci_error_handlers efct_pci_err_handler = { .error_detected = efct_pci_io_error_detected, .slot_reset = efct_pci_io_slot_reset, .resume = efct_pci_io_resume, diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 84d8de07b7ae..c73a71ac3c29 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -453,17 +453,13 @@ static ssize_t host_store_hp_ssd_smart_path_status(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int status, len; + int status; struct ctlr_info *h; struct Scsi_Host *shost = class_to_shost(dev); - char tmpbuf[10]; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; - len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count; - strncpy(tmpbuf, buf, len); - tmpbuf[len] = '\0'; - if (sscanf(tmpbuf, "%d", &status) != 1) + if (kstrtoint(buf, 10, &status)) return -EINVAL; h = shost_to_hba(shost); h->acciopath_status = !!status; @@ -477,17 +473,13 @@ static ssize_t host_store_raid_offload_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int debug_level, len; + int debug_level; struct ctlr_info *h; struct Scsi_Host *shost = class_to_shost(dev); - char tmpbuf[10]; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; - len = count > sizeof(tmpbuf) - 1 ? sizeof(tmpbuf) - 1 : count; - strncpy(tmpbuf, buf, len); - tmpbuf[len] = '\0'; - if (sscanf(tmpbuf, "%d", &debug_level) != 1) + if (kstrtoint(buf, 10, &debug_level)) return -EINVAL; if (debug_level < 0) debug_level = 0; @@ -7238,8 +7230,7 @@ static int hpsa_controller_hard_reset(struct pci_dev *pdev, static void init_driver_version(char *driver_version, int len) { - memset(driver_version, 0, len); - strncpy(driver_version, HPSA " " HPSA_DRIVER_VERSION, len - 1); + strscpy_pad(driver_version, HPSA " " HPSA_DRIVER_VERSION, len); } static int write_driver_ver_to_cfgtable(struct CfgTable __iomem *cfgtable) diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index cce6c6b409ad..94adb6ac02a4 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3631,8 +3631,8 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) break; - case RESERVE: - case RELEASE: + case RESERVE_6: + case RELEASE_6: scb->scsi_cmd->result = DID_OK << 16; break; @@ -3899,8 +3899,8 @@ ips_chkstatus(ips_ha_t * ha, IPS_STATUS * pstatus) case WRITE_6: case READ_10: case WRITE_10: - case RESERVE: - case RELEASE: + case RESERVE_6: + case RELEASE_6: break; case MODE_SENSE: diff --git a/drivers/scsi/isci/remote_device.h b/drivers/scsi/isci/remote_device.h index 27ae45332704..561ae3f2cbbd 100644 --- a/drivers/scsi/isci/remote_device.h +++ b/drivers/scsi/isci/remote_device.h @@ -198,7 +198,7 @@ enum sci_status sci_remote_device_reset( * device. When there are no active IO for the device it is is in this * state. * - * @SCI_STP_DEV_CMD: This is the command state for for the STP remote + * @SCI_STP_DEV_CMD: This is the command state for the STP remote * device. This state is entered when the device is processing a * non-NCQ command. The device object will fail any new start IO * requests until this command is complete. diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index e81f60985193..7b4fe0e6afb2 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -17,7 +17,6 @@ * Zhenyu Wang */ -#include <crypto/hash.h> #include <linux/types.h> #include <linux/inet.h> #include <linux/slab.h> @@ -468,8 +467,7 @@ static void iscsi_sw_tcp_send_hdr_prep(struct iscsi_conn *conn, void *hdr, * sufficient room. */ if (conn->hdrdgst_en) { - iscsi_tcp_dgst_header(tcp_sw_conn->tx_hash, hdr, hdrlen, - hdr + hdrlen); + iscsi_tcp_dgst_header(hdr, hdrlen, hdr + hdrlen); hdrlen += ISCSI_DIGEST_SIZE; } @@ -494,7 +492,7 @@ iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg, { struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; - struct ahash_request *tx_hash = NULL; + u32 *tx_crcp = NULL; unsigned int hdr_spec_len; ISCSI_SW_TCP_DBG(conn, "offset=%d, datalen=%d %s\n", offset, len, @@ -507,11 +505,10 @@ iscsi_sw_tcp_send_data_prep(struct iscsi_conn *conn, struct scatterlist *sg, WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len)); if (conn->datadgst_en) - tx_hash = tcp_sw_conn->tx_hash; + tx_crcp = &tcp_sw_conn->tx_crc; return iscsi_segment_seek_sg(&tcp_sw_conn->out.data_segment, - sg, count, offset, len, - NULL, tx_hash); + sg, count, offset, len, NULL, tx_crcp); } static void @@ -520,7 +517,7 @@ iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data, { struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; - struct ahash_request *tx_hash = NULL; + u32 *tx_crcp = NULL; unsigned int hdr_spec_len; ISCSI_SW_TCP_DBG(conn, "datalen=%zd %s\n", len, conn->datadgst_en ? @@ -532,10 +529,10 @@ iscsi_sw_tcp_send_linear_data_prep(struct iscsi_conn *conn, void *data, WARN_ON(iscsi_padded(len) != iscsi_padded(hdr_spec_len)); if (conn->datadgst_en) - tx_hash = tcp_sw_conn->tx_hash; + tx_crcp = &tcp_sw_conn->tx_crc; iscsi_segment_init_linear(&tcp_sw_conn->out.data_segment, - data, len, NULL, tx_hash); + data, len, NULL, tx_crcp); } static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task, @@ -583,7 +580,6 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn; struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; - struct crypto_ahash *tfm; cls_conn = iscsi_tcp_conn_setup(cls_session, sizeof(*tcp_sw_conn), conn_idx); @@ -596,37 +592,9 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, tcp_sw_conn->queue_recv = iscsi_recv_from_iscsi_q; mutex_init(&tcp_sw_conn->sock_lock); - - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - goto free_conn; - - tcp_sw_conn->tx_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!tcp_sw_conn->tx_hash) - goto free_tfm; - ahash_request_set_callback(tcp_sw_conn->tx_hash, 0, NULL, NULL); - - tcp_sw_conn->rx_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!tcp_sw_conn->rx_hash) - goto free_tx_hash; - ahash_request_set_callback(tcp_sw_conn->rx_hash, 0, NULL, NULL); - - tcp_conn->rx_hash = tcp_sw_conn->rx_hash; + tcp_conn->rx_crcp = &tcp_sw_conn->rx_crc; return cls_conn; - -free_tx_hash: - ahash_request_free(tcp_sw_conn->tx_hash); -free_tfm: - crypto_free_ahash(tfm); -free_conn: - iscsi_conn_printk(KERN_ERR, conn, - "Could not create connection due to crc32c " - "loading error. Make sure the crc32c " - "module is built as a module or into the " - "kernel\n"); - iscsi_tcp_conn_teardown(cls_conn); - return NULL; } static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) @@ -664,20 +632,8 @@ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) static void iscsi_sw_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; - struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; iscsi_sw_tcp_release_conn(conn); - - ahash_request_free(tcp_sw_conn->rx_hash); - if (tcp_sw_conn->tx_hash) { - struct crypto_ahash *tfm; - - tfm = crypto_ahash_reqtfm(tcp_sw_conn->tx_hash); - ahash_request_free(tcp_sw_conn->tx_hash); - crypto_free_ahash(tfm); - } - iscsi_tcp_conn_teardown(cls_conn); } diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 89a6fc552f0b..c3e5d9fa6add 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -41,8 +41,8 @@ struct iscsi_sw_tcp_conn { void (*old_write_space)(struct sock *); /* data and header digests */ - struct ahash_request *tx_hash; /* CRC32C (Tx) */ - struct ahash_request *rx_hash; /* CRC32C (Rx) */ + u32 tx_crc; /* CRC32C (Tx) */ + u32 rx_crc; /* CRC32C (Rx) */ /* MIB custom statistics */ uint32_t sendpage_failures_cnt; diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index c182aa83f2c9..e90805ba868f 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -15,7 +15,7 @@ * Zhenyu Wang */ -#include <crypto/hash.h> +#include <linux/crc32c.h> #include <linux/types.h> #include <linux/list.h> #include <linux/inet.h> @@ -168,7 +168,7 @@ iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest) segment->size = ISCSI_DIGEST_SIZE; segment->copied = 0; segment->sg = NULL; - segment->hash = NULL; + segment->crcp = NULL; } /** @@ -191,29 +191,27 @@ int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn, struct iscsi_segment *segment, int recv, unsigned copied) { - struct scatterlist sg; unsigned int pad; ISCSI_DBG_TCP(tcp_conn->iscsi_conn, "copied %u %u size %u %s\n", segment->copied, copied, segment->size, recv ? "recv" : "xmit"); - if (segment->hash && copied) { - /* - * If a segment is kmapd we must unmap it before sending - * to the crypto layer since that will try to kmap it again. - */ - iscsi_tcp_segment_unmap(segment); - - if (!segment->data) { - sg_init_table(&sg, 1); - sg_set_page(&sg, sg_page(segment->sg), copied, - segment->copied + segment->sg_offset + - segment->sg->offset); - } else - sg_init_one(&sg, segment->data + segment->copied, - copied); - ahash_request_set_crypt(segment->hash, &sg, NULL, copied); - crypto_ahash_update(segment->hash); + if (segment->crcp && copied) { + if (segment->data) { + *segment->crcp = crc32c(*segment->crcp, + segment->data + segment->copied, + copied); + } else { + const void *data; + + data = kmap_local_page(sg_page(segment->sg)); + *segment->crcp = crc32c(*segment->crcp, + data + segment->copied + + segment->sg_offset + + segment->sg->offset, + copied); + kunmap_local(data); + } } segment->copied += copied; @@ -258,10 +256,8 @@ int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn, * Set us up for transferring the data digest. hdr digest * is completely handled in hdr done function. */ - if (segment->hash) { - ahash_request_set_crypt(segment->hash, NULL, - segment->digest, 0); - crypto_ahash_final(segment->hash); + if (segment->crcp) { + put_unaligned_le32(~*segment->crcp, segment->digest); iscsi_tcp_segment_splice_digest(segment, recv ? segment->recv_digest : segment->digest); return 0; @@ -282,8 +278,7 @@ EXPORT_SYMBOL_GPL(iscsi_tcp_segment_done); * given buffer, and returns the number of bytes * consumed, which can actually be less than @len. * - * If hash digest is enabled, the function will update the - * hash while copying. + * If CRC is enabled, the function will update the CRC while copying. * Combining these two operations doesn't buy us a lot (yet), * but in the future we could implement combined copy+crc, * just way we do for network layer checksums. @@ -311,14 +306,10 @@ iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn, } inline void -iscsi_tcp_dgst_header(struct ahash_request *hash, const void *hdr, - size_t hdrlen, unsigned char digest[ISCSI_DIGEST_SIZE]) +iscsi_tcp_dgst_header(const void *hdr, size_t hdrlen, + unsigned char digest[ISCSI_DIGEST_SIZE]) { - struct scatterlist sg; - - sg_init_one(&sg, hdr, hdrlen); - ahash_request_set_crypt(hash, &sg, digest, hdrlen); - crypto_ahash_digest(hash); + put_unaligned_le32(~crc32c(~0, hdr, hdrlen), digest); } EXPORT_SYMBOL_GPL(iscsi_tcp_dgst_header); @@ -343,24 +334,23 @@ iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn, */ static inline void __iscsi_segment_init(struct iscsi_segment *segment, size_t size, - iscsi_segment_done_fn_t *done, struct ahash_request *hash) + iscsi_segment_done_fn_t *done, u32 *crcp) { memset(segment, 0, sizeof(*segment)); segment->total_size = size; segment->done = done; - if (hash) { - segment->hash = hash; - crypto_ahash_init(hash); + if (crcp) { + segment->crcp = crcp; + *crcp = ~0; } } inline void iscsi_segment_init_linear(struct iscsi_segment *segment, void *data, - size_t size, iscsi_segment_done_fn_t *done, - struct ahash_request *hash) + size_t size, iscsi_segment_done_fn_t *done, u32 *crcp) { - __iscsi_segment_init(segment, size, done, hash); + __iscsi_segment_init(segment, size, done, crcp); segment->data = data; segment->size = size; } @@ -370,13 +360,12 @@ inline int iscsi_segment_seek_sg(struct iscsi_segment *segment, struct scatterlist *sg_list, unsigned int sg_count, unsigned int offset, size_t size, - iscsi_segment_done_fn_t *done, - struct ahash_request *hash) + iscsi_segment_done_fn_t *done, u32 *crcp) { struct scatterlist *sg; unsigned int i; - __iscsi_segment_init(segment, size, done, hash); + __iscsi_segment_init(segment, size, done, crcp); for_each_sg(sg_list, sg, sg_count, i) { if (offset < sg->length) { iscsi_tcp_segment_init_sg(segment, sg, offset); @@ -393,7 +382,7 @@ EXPORT_SYMBOL_GPL(iscsi_segment_seek_sg); * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception * @tcp_conn: iscsi connection to prep for * - * This function always passes NULL for the hash argument, because when this + * This function always passes NULL for the crcp argument, because when this * function is called we do not yet know the final size of the header and want * to delay the digest processing until we know that. */ @@ -434,15 +423,15 @@ static void iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn) { struct iscsi_conn *conn = tcp_conn->iscsi_conn; - struct ahash_request *rx_hash = NULL; + u32 *rx_crcp = NULL; if (conn->datadgst_en && !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) - rx_hash = tcp_conn->rx_hash; + rx_crcp = tcp_conn->rx_crcp; iscsi_segment_init_linear(&tcp_conn->in.segment, conn->data, tcp_conn->in.datalen, - iscsi_tcp_data_recv_done, rx_hash); + iscsi_tcp_data_recv_done, rx_crcp); } /** @@ -730,7 +719,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr) if (tcp_conn->in.datalen) { struct iscsi_tcp_task *tcp_task = task->dd_data; - struct ahash_request *rx_hash = NULL; + u32 *rx_crcp = NULL; struct scsi_data_buffer *sdb = &task->sc->sdb; /* @@ -743,7 +732,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr) */ if (conn->datadgst_en && !(conn->session->tt->caps & CAP_DIGEST_OFFLOAD)) - rx_hash = tcp_conn->rx_hash; + rx_crcp = tcp_conn->rx_crcp; ISCSI_DBG_TCP(conn, "iscsi_tcp_begin_data_in( " "offset=%d, datalen=%d)\n", @@ -756,7 +745,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr) tcp_task->data_offset, tcp_conn->in.datalen, iscsi_tcp_process_data_in, - rx_hash); + rx_crcp); spin_unlock(&conn->session->back_lock); return rc; } @@ -878,7 +867,7 @@ iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn, return 0; } - iscsi_tcp_dgst_header(tcp_conn->rx_hash, hdr, + iscsi_tcp_dgst_header(hdr, segment->total_copied - ISCSI_DIGEST_SIZE, segment->digest); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 1d7db49a8fe4..9ab2e98cf693 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term * * “Broadcom†refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -9569,18 +9569,16 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport) mbx_tmo_err = test_bit(MBX_TMO_ERR, &phba->bit_flags); /* First we need to issue aborts to outstanding cmds on txcmpl */ list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { + if (piocb->vport != vport) + continue; + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, "2243 iotag = 0x%x cmd_flag = 0x%x " - "ulp_command = 0x%x this_vport %x " - "sli_flag = 0x%x\n", + "ulp_command = 0x%x sli_flag = 0x%x\n", piocb->iotag, piocb->cmd_flag, get_job_cmnd(phba, piocb), - (piocb->vport == vport), phba->sli.sli_flag); - if (piocb->vport != vport) - continue; - if ((phba->sli.sli_flag & LPFC_SLI_ACTIVE) && !mbx_tmo_err) { if (piocb->cmd_flag & LPFC_IO_LIBDFC) continue; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 36e66df36a18..a2fd74cf8603 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term * * “Broadcom†refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -228,10 +228,16 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport) if (ndlp->nlp_state == NLP_STE_MAPPED_NODE) return; - /* check for recovered fabric node */ - if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && - ndlp->nlp_DID == Fabric_DID) + /* Ignore callback for a mismatched (stale) rport */ + if (ndlp->rport != rport) { + lpfc_vlog_msg(vport, KERN_WARNING, LOG_NODE, + "6788 fc rport mismatch: d_id x%06x ndlp x%px " + "fc rport x%px node rport x%px state x%x " + "refcnt %u\n", + ndlp->nlp_DID, ndlp, rport, ndlp->rport, + ndlp->nlp_state, kref_read(&ndlp->kref)); return; + } if (rport->port_name != wwn_to_u64(ndlp->nlp_portname.u.wwn)) lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, @@ -5564,6 +5570,7 @@ static struct lpfc_nodelist * __lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) { struct lpfc_nodelist *ndlp; + struct lpfc_nodelist *np = NULL; uint32_t data1; list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { @@ -5578,14 +5585,20 @@ __lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) ndlp, ndlp->nlp_DID, ndlp->nlp_flag, data1, ndlp->nlp_rpi, ndlp->active_rrqs_xri_bitmap); - return ndlp; + + /* Check for new or potentially stale node */ + if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) + return ndlp; + np = ndlp; } } - /* FIND node did <did> NOT FOUND */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, - "0932 FIND node did x%x NOT FOUND.\n", did); - return NULL; + if (!np) + /* FIND node did <did> NOT FOUND */ + lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, + "0932 FIND node did x%x NOT FOUND.\n", did); + + return np; } struct lpfc_nodelist * diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index bcadf11414c8..e360fc79b028 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term * * “Broadcom†refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -13170,6 +13170,7 @@ lpfc_sli4_enable_msi(struct lpfc_hba *phba) eqhdl = lpfc_get_eq_hdl(0); rc = pci_irq_vector(phba->pcidev, 0); if (rc < 0) { + free_irq(phba->pcidev->irq, phba); pci_free_irq_vectors(phba->pcidev); lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0496 MSI pci_irq_vec failed (%d)\n", rc); @@ -13250,6 +13251,7 @@ lpfc_sli4_enable_intr(struct lpfc_hba *phba, uint32_t cfg_mode) eqhdl = lpfc_get_eq_hdl(0); retval = pci_irq_vector(phba->pcidev, 0); if (retval < 0) { + free_irq(phba->pcidev->irq, phba); lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, "0502 INTR pci_irq_vec failed (%d)\n", retval); diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index c35f7225058e..638b50f35287 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -1,7 +1,7 @@ /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2024 Broadcom. All Rights Reserved. The term * + * Copyright (C) 2017-2025 Broadcom. All Rights Reserved. The term * * “Broadcom†refers to Broadcom Inc. and/or its subsidiaries. * * Copyright (C) 2004-2016 Emulex. All rights reserved. * * EMULEX and SLI are trademarks of Emulex. * @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "14.4.0.7" +#define LPFC_DRIVER_VERSION "14.4.0.8" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ @@ -32,6 +32,6 @@ #define LPFC_MODULE_DESC "Emulex LightPulse Fibre Channel SCSI driver " \ LPFC_DRIVER_VERSION -#define LPFC_COPYRIGHT "Copyright (C) 2017-2024 Broadcom. All Rights " \ +#define LPFC_COPYRIGHT "Copyright (C) 2017-2025 Broadcom. All Rights " \ "Reserved. The term \"Broadcom\" refers to Broadcom Inc. " \ "and/or its subsidiaries." diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index adab151663dd..2006094af418 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -855,8 +855,8 @@ mega_build_cmd(adapter_t *adapter, struct scsi_cmnd *cmd, int *busy) return scb; #if MEGA_HAVE_CLUSTERING - case RESERVE: - case RELEASE: + case RESERVE_6: + case RELEASE_6: /* * Do we support clustering and is the support enabled @@ -875,7 +875,7 @@ mega_build_cmd(adapter_t *adapter, struct scsi_cmnd *cmd, int *busy) } scb->raw_mbox[0] = MEGA_CLUSTER_CMD; - scb->raw_mbox[2] = ( *cmd->cmnd == RESERVE ) ? + scb->raw_mbox[2] = *cmd->cmnd == RESERVE_6 ? MEGA_RESERVE_LD : MEGA_RELEASE_LD; scb->raw_mbox[3] = ldrv_num; @@ -1618,8 +1618,8 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status) * failed or the input parameter is invalid */ if( status == 1 && - (cmd->cmnd[0] == RESERVE || - cmd->cmnd[0] == RELEASE) ) { + (cmd->cmnd[0] == RESERVE_6 || + cmd->cmnd[0] == RELEASE_6) ) { cmd->result |= (DID_ERROR << 16) | SAM_STAT_RESERVATION_CONFLICT; diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index 60cc3372991f..3ba837b3093f 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -1725,8 +1725,8 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy) return scb; - case RESERVE: - case RELEASE: + case RESERVE_6: + case RELEASE_6: /* * Do we support clustering and is the support enabled */ @@ -1748,7 +1748,7 @@ megaraid_mbox_build_cmd(adapter_t *adapter, struct scsi_cmnd *scp, int *busy) scb->dev_channel = 0xFF; scb->dev_target = target; ccb->raw_mbox[0] = CLUSTER_CMD; - ccb->raw_mbox[2] = (scp->cmnd[0] == RESERVE) ? + ccb->raw_mbox[2] = scp->cmnd[0] == RESERVE_6 ? RESERVE_LD : RELEASE_LD; ccb->raw_mbox[3] = target; @@ -2334,8 +2334,8 @@ megaraid_mbox_dpc(unsigned long devp) * Error code returned is 1 if Reserve or Release * failed or the input parameter is invalid */ - if (status == 1 && (scp->cmnd[0] == RESERVE || - scp->cmnd[0] == RELEASE)) { + if (status == 1 && (scp->cmnd[0] == RESERVE_6 || + scp->cmnd[0] == RELEASE_6)) { scp->result = DID_ERROR << 16 | SAM_STAT_RESERVATION_CONFLICT; diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h index 00cd18edfad6..96401eb7e231 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_cnfg.h @@ -19,6 +19,7 @@ #define MPI3_CONFIG_PAGETYPE_PCIE_SWITCH (0x31) #define MPI3_CONFIG_PAGETYPE_PCIE_LINK (0x33) #define MPI3_CONFIG_PAGEATTR_MASK (0xf0) +#define MPI3_CONFIG_PAGEATTR_SHIFT (4) #define MPI3_CONFIG_PAGEATTR_READ_ONLY (0x00) #define MPI3_CONFIG_PAGEATTR_CHANGEABLE (0x10) #define MPI3_CONFIG_PAGEATTR_PERSISTENT (0x20) @@ -29,10 +30,13 @@ #define MPI3_CONFIG_ACTION_READ_PERSISTENT (0x04) #define MPI3_CONFIG_ACTION_WRITE_PERSISTENT (0x05) #define MPI3_DEVICE_PGAD_FORM_MASK (0xf0000000) +#define MPI3_DEVICE_PGAD_FORM_SHIFT (28) #define MPI3_DEVICE_PGAD_FORM_GET_NEXT_HANDLE (0x00000000) #define MPI3_DEVICE_PGAD_FORM_HANDLE (0x20000000) #define MPI3_DEVICE_PGAD_HANDLE_MASK (0x0000ffff) +#define MPI3_DEVICE_PGAD_HANDLE_SHIFT (0) #define MPI3_SAS_EXPAND_PGAD_FORM_MASK (0xf0000000) +#define MPI3_SAS_EXPAND_PGAD_FORM_SHIFT (28) #define MPI3_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE (0x00000000) #define MPI3_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM (0x10000000) #define MPI3_SAS_EXPAND_PGAD_FORM_HANDLE (0x20000000) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_image.h b/drivers/scsi/mpi3mr/mpi/mpi30_image.h index 2c6e548cbd0f..8d824107a678 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_image.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_image.h @@ -66,7 +66,12 @@ struct mpi3_component_image_header { #define MPI3_IMAGE_HEADER_SIGNATURE1_SMM (0x204d4d53) #define MPI3_IMAGE_HEADER_SIGNATURE1_PSW (0x20575350) #define MPI3_IMAGE_HEADER_SIGNATURE2_VALUE (0x50584546) +#define MPI3_IMAGE_HEADER_FLAGS_SIGNED_UEFI_MASK (0x00000300) +#define MPI3_IMAGE_HEADER_FLAGS_SIGNED_UEFI_SHIFT (8) +#define MPI3_IMAGE_HEADER_FLAGS_CERT_CHAIN_FORMAT_MASK (0x000000c0) +#define MPI3_IMAGE_HEADER_FLAGS_CERT_CHAIN_FORMAT_SHIFT (6) #define MPI3_IMAGE_HEADER_FLAGS_DEVICE_KEY_BASIS_MASK (0x00000030) +#define MPI3_IMAGE_HEADER_FLAGS_DEVICE_KEY_BASIS_SHIFT (4) #define MPI3_IMAGE_HEADER_FLAGS_DEVICE_KEY_BASIS_CDI (0x00000000) #define MPI3_IMAGE_HEADER_FLAGS_DEVICE_KEY_BASIS_DI (0x00000010) #define MPI3_IMAGE_HEADER_FLAGS_SIGNED_NVDATA (0x00000008) @@ -214,11 +219,13 @@ struct mpi3_encrypted_hash_entry { #define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_1_OF_2 (0x04) #define MPI3_HASH_IMAGE_TYPE_KEY_WITH_HASH_2_OF_2 (0x05) #define MPI3_HASH_ALGORITHM_VERSION_MASK (0xe0) +#define MPI3_HASH_ALGORITHM_VERSION_SHIFT (5) #define MPI3_HASH_ALGORITHM_VERSION_NONE (0x00) #define MPI3_HASH_ALGORITHM_VERSION_SHA1 (0x20) #define MPI3_HASH_ALGORITHM_VERSION_SHA2 (0x40) #define MPI3_HASH_ALGORITHM_VERSION_SHA3 (0x60) #define MPI3_HASH_ALGORITHM_SIZE_MASK (0x1f) +#define MPI3_HASH_ALGORITHM_SIZE_SHIFT (0) #define MPI3_HASH_ALGORITHM_SIZE_UNUSED (0x00) #define MPI3_HASH_ALGORITHM_SIZE_SHA256 (0x01) #define MPI3_HASH_ALGORITHM_SIZE_SHA512 (0x02) @@ -236,6 +243,7 @@ struct mpi3_encrypted_hash_entry { #define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_65 (0x0c) #define MPI3_ENCRYPTION_ALGORITHM_ML_DSA_44 (0x0d) #define MPI3_ENCRYPTED_HASH_ENTRY_FLAGS_PAIRED_KEY_MASK (0x0f) +#define MPI3_ENCRYPTED_HASH_ENTRY_FLAGS_PAIRED_KEY_SHIFT (0) #ifndef MPI3_ENCRYPTED_HASH_ENTRY_MAX #define MPI3_ENCRYPTED_HASH_ENTRY_MAX (1) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_init.h b/drivers/scsi/mpi3mr/mpi/mpi30_init.h index af86d12c8e49..bbef5bac92ed 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_init.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_init.h @@ -38,23 +38,31 @@ struct mpi3_scsi_io_request { #define MPI3_SCSIIO_MSGFLAGS_METASGL_VALID (0x80) #define MPI3_SCSIIO_MSGFLAGS_DIVERT_TO_FIRMWARE (0x40) #define MPI3_SCSIIO_FLAGS_LARGE_CDB (0x60000000) +#define MPI3_SCSIIO_FLAGS_LARGE_CDB_MASK (0x60000000) +#define MPI3_SCSIIO_FLAGS_LARGE_CDB_SHIFT (29) +#define MPI3_SCSIIO_FLAGS_IOC_USE_ONLY_27_MASK (0x18000000) +#define MPI3_SCSIIO_FLAGS_IOC_USE_ONLY_27_SHIFT (27) #define MPI3_SCSIIO_FLAGS_CDB_16_OR_LESS (0x00000000) #define MPI3_SCSIIO_FLAGS_CDB_GREATER_THAN_16 (0x20000000) #define MPI3_SCSIIO_FLAGS_CDB_IN_SEPARATE_BUFFER (0x40000000) #define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_MASK (0x07000000) +#define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_SHIFT (24) +#define MPI3_SCSIIO_FLAGS_DATADIRECTION_MASK (0x000c0000) +#define MPI3_SCSIIO_FLAGS_DATADIRECTION_SHIFT (18) #define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_SIMPLEQ (0x00000000) #define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_HEADOFQ (0x01000000) #define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_ORDEREDQ (0x02000000) #define MPI3_SCSIIO_FLAGS_TASKATTRIBUTE_ACAQ (0x04000000) #define MPI3_SCSIIO_FLAGS_CMDPRI_MASK (0x00f00000) #define MPI3_SCSIIO_FLAGS_CMDPRI_SHIFT (20) -#define MPI3_SCSIIO_FLAGS_DATADIRECTION_MASK (0x000c0000) #define MPI3_SCSIIO_FLAGS_DATADIRECTION_NO_DATA_TRANSFER (0x00000000) #define MPI3_SCSIIO_FLAGS_DATADIRECTION_WRITE (0x00040000) #define MPI3_SCSIIO_FLAGS_DATADIRECTION_READ (0x00080000) #define MPI3_SCSIIO_FLAGS_DMAOPERATION_MASK (0x00030000) +#define MPI3_SCSIIO_FLAGS_DMAOPERATION_SHIFT (16) #define MPI3_SCSIIO_FLAGS_DMAOPERATION_HOST_PI (0x00010000) #define MPI3_SCSIIO_FLAGS_DIVERT_REASON_MASK (0x000000f0) +#define MPI3_SCSIIO_FLAGS_DIVERT_REASON_SHIFT (4) #define MPI3_SCSIIO_FLAGS_DIVERT_REASON_IO_THROTTLING (0x00000010) #define MPI3_SCSIIO_FLAGS_DIVERT_REASON_WRITE_SAME_TOO_LARGE (0x00000020) #define MPI3_SCSIIO_FLAGS_DIVERT_REASON_PROD_SPECIFIC (0x00000080) @@ -99,6 +107,7 @@ struct mpi3_scsi_io_reply { #define MPI3_SCSI_STATUS_ACA_ACTIVE (0x30) #define MPI3_SCSI_STATUS_TASK_ABORTED (0x40) #define MPI3_SCSI_STATE_SENSE_MASK (0x03) +#define MPI3_SCSI_STATE_SENSE_SHIFT (0) #define MPI3_SCSI_STATE_SENSE_VALID (0x00) #define MPI3_SCSI_STATE_SENSE_FAILED (0x01) #define MPI3_SCSI_STATE_SENSE_BUFF_Q_EMPTY (0x02) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h index c374867f9ba0..b42933fcd423 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_ioc.h @@ -30,6 +30,7 @@ struct mpi3_ioc_init_request { #define MPI3_IOCINIT_MSGFLAGS_WRITESAMEDIVERT_SUPPORTED (0x08) #define MPI3_IOCINIT_MSGFLAGS_SCSIIOSTATUSREPLY_SUPPORTED (0x04) #define MPI3_IOCINIT_MSGFLAGS_HOSTMETADATA_MASK (0x03) +#define MPI3_IOCINIT_MSGFLAGS_HOSTMETADATA_SHIFT (0) #define MPI3_IOCINIT_MSGFLAGS_HOSTMETADATA_NOT_USED (0x00) #define MPI3_IOCINIT_MSGFLAGS_HOSTMETADATA_SEPARATED (0x01) #define MPI3_IOCINIT_MSGFLAGS_HOSTMETADATA_INLINE (0x02) @@ -40,6 +41,7 @@ struct mpi3_ioc_init_request { #define MPI3_WHOINIT_MANUFACTURER (0x04) #define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_MASK (0x00000003) +#define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_SHIFT (0) #define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_GUIDANCE (0x00000000) #define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_NO_SPECIAL (0x00000001) #define MPI3_IOCINIT_DRIVERCAP_OSEXPOSURE_REPORT_AS_HDD (0x00000002) @@ -111,9 +113,11 @@ struct mpi3_ioc_facts_data { __le32 diag_tty_size; }; #define MPI3_IOCFACTS_CAPABILITY_NON_SUPERVISOR_MASK (0x80000000) +#define MPI3_IOCFACTS_CAPABILITY_NON_SUPERVISOR_SHIFT (31) #define MPI3_IOCFACTS_CAPABILITY_SUPERVISOR_IOC (0x00000000) #define MPI3_IOCFACTS_CAPABILITY_NON_SUPERVISOR_IOC (0x80000000) #define MPI3_IOCFACTS_CAPABILITY_INT_COALESCE_MASK (0x00000600) +#define MPI3_IOCFACTS_CAPABILITY_INT_COALESCE_SHIFT (9) #define MPI3_IOCFACTS_CAPABILITY_INT_COALESCE_FIXED_THRESHOLD (0x00000000) #define MPI3_IOCFACTS_CAPABILITY_INT_COALESCE_OUTSTANDING_IO (0x00000200) #define MPI3_IOCFACTS_CAPABILITY_COMPLETE_RESET_SUPPORTED (0x00000100) @@ -134,6 +138,7 @@ struct mpi3_ioc_facts_data { #define MPI3_IOCFACTS_EXCEPT_SAS_DISABLED (0x1000) #define MPI3_IOCFACTS_EXCEPT_SAFE_MODE (0x0800) #define MPI3_IOCFACTS_EXCEPT_SECURITY_KEY_MASK (0x0700) +#define MPI3_IOCFACTS_EXCEPT_SECURITY_KEY_SHIFT (8) #define MPI3_IOCFACTS_EXCEPT_SECURITY_KEY_NONE (0x0000) #define MPI3_IOCFACTS_EXCEPT_SECURITY_KEY_LOCAL_VIA_MGMT (0x0100) #define MPI3_IOCFACTS_EXCEPT_SECURITY_KEY_EXT_VIA_MGMT (0x0200) @@ -149,6 +154,7 @@ struct mpi3_ioc_facts_data { #define MPI3_IOCFACTS_EXCEPT_BLOCKING_BOOT_EVENT (0x0004) #define MPI3_IOCFACTS_EXCEPT_SECURITY_SELFTEST_FAILURE (0x0002) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_MASK (0x0001) +#define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_SHIFT (0) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_PRIMARY (0x0000) #define MPI3_IOCFACTS_EXCEPT_BOOTSTAT_SECONDARY (0x0001) #define MPI3_IOCFACTS_PROTOCOL_SAS (0x0010) @@ -161,10 +167,12 @@ struct mpi3_ioc_facts_data { #define MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_MASK (0x0000ff00) #define MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_SHIFT (8) #define MPI3_IOCFACTS_FLAGS_INITIAL_PORT_ENABLE_MASK (0x00000030) +#define MPI3_IOCFACTS_FLAGS_INITIAL_PORT_ENABLE_SHIFT (4) #define MPI3_IOCFACTS_FLAGS_INITIAL_PORT_ENABLE_NOT_STARTED (0x00000000) #define MPI3_IOCFACTS_FLAGS_INITIAL_PORT_ENABLE_IN_PROGRESS (0x00000010) #define MPI3_IOCFACTS_FLAGS_INITIAL_PORT_ENABLE_COMPLETE (0x00000020) #define MPI3_IOCFACTS_FLAGS_PERSONALITY_MASK (0x0000000f) +#define MPI3_IOCFACTS_FLAGS_PERSONALITY_SHIFT (0) #define MPI3_IOCFACTS_FLAGS_PERSONALITY_EHBA (0x00000000) #define MPI3_IOCFACTS_FLAGS_PERSONALITY_RAID_DDR (0x00000002) #define MPI3_IOCFACTS_IO_THROTTLE_DATA_LENGTH_NOT_REQUIRED (0x0000) @@ -204,6 +212,7 @@ struct mpi3_create_request_queue_request { }; #define MPI3_CREATE_REQUEST_QUEUE_FLAGS_SEGMENTED_MASK (0x80) +#define MPI3_CREATE_REQUEST_QUEUE_FLAGS_SEGMENTED_SHIFT (7) #define MPI3_CREATE_REQUEST_QUEUE_FLAGS_SEGMENTED_SEGMENTED (0x80) #define MPI3_CREATE_REQUEST_QUEUE_FLAGS_SEGMENTED_CONTIGUOUS (0x00) #define MPI3_CREATE_REQUEST_QUEUE_SIZE_MINIMUM (2) @@ -237,10 +246,12 @@ struct mpi3_create_reply_queue_request { }; #define MPI3_CREATE_REPLY_QUEUE_FLAGS_SEGMENTED_MASK (0x80) +#define MPI3_CREATE_REPLY_QUEUE_FLAGS_SEGMENTED_SHIFT (7) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_SEGMENTED_SEGMENTED (0x80) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_SEGMENTED_CONTIGUOUS (0x00) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_COALESCE_DISABLE (0x02) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_INT_ENABLE_MASK (0x01) +#define MPI3_CREATE_REPLY_QUEUE_FLAGS_INT_ENABLE_SHIFT (0) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_INT_ENABLE_DISABLE (0x00) #define MPI3_CREATE_REPLY_QUEUE_FLAGS_INT_ENABLE_ENABLE (0x01) #define MPI3_CREATE_REPLY_QUEUE_SIZE_MINIMUM (2) @@ -326,9 +337,11 @@ struct mpi3_event_notification_reply { }; #define MPI3_EVENT_NOTIFY_MSGFLAGS_ACK_MASK (0x01) +#define MPI3_EVENT_NOTIFY_MSGFLAGS_ACK_SHIFT (0) #define MPI3_EVENT_NOTIFY_MSGFLAGS_ACK_REQUIRED (0x01) #define MPI3_EVENT_NOTIFY_MSGFLAGS_ACK_NOT_REQUIRED (0x00) #define MPI3_EVENT_NOTIFY_MSGFLAGS_EVENT_ORIGINALITY_MASK (0x02) +#define MPI3_EVENT_NOTIFY_MSGFLAGS_EVENT_ORIGINALITY_SHIFT (1) #define MPI3_EVENT_NOTIFY_MSGFLAGS_EVENT_ORIGINALITY_ORIGINAL (0x00) #define MPI3_EVENT_NOTIFY_MSGFLAGS_EVENT_ORIGINALITY_REPLAY (0x02) struct mpi3_event_data_gpio_interrupt { @@ -487,6 +500,7 @@ struct mpi3_event_sas_topo_phy_entry { #define MPI3_EVENT_SAS_TOPO_PHY_STATUS_NO_EXIST (0x40) #define MPI3_EVENT_SAS_TOPO_PHY_STATUS_VACANT (0x80) #define MPI3_EVENT_SAS_TOPO_PHY_RC_MASK (0x0f) +#define MPI3_EVENT_SAS_TOPO_PHY_RC_SHIFT (0) #define MPI3_EVENT_SAS_TOPO_PHY_RC_TARG_NOT_RESPONDING (0x02) #define MPI3_EVENT_SAS_TOPO_PHY_RC_PHY_CHANGED (0x03) #define MPI3_EVENT_SAS_TOPO_PHY_RC_NO_CHANGE (0x04) @@ -566,6 +580,7 @@ struct mpi3_event_pcie_topo_port_entry { #define MPI3_EVENT_PCIE_TOPO_PS_DELAY_NOT_RESPONDING (0x05) #define MPI3_EVENT_PCIE_TOPO_PS_RESPONDING (0x06) #define MPI3_EVENT_PCIE_TOPO_PI_LANES_MASK (0xf0) +#define MPI3_EVENT_PCIE_TOPO_PI_LANES_SHIFT (4) #define MPI3_EVENT_PCIE_TOPO_PI_LANES_UNKNOWN (0x00) #define MPI3_EVENT_PCIE_TOPO_PI_LANES_1 (0x10) #define MPI3_EVENT_PCIE_TOPO_PI_LANES_2 (0x20) @@ -573,6 +588,7 @@ struct mpi3_event_pcie_topo_port_entry { #define MPI3_EVENT_PCIE_TOPO_PI_LANES_8 (0x40) #define MPI3_EVENT_PCIE_TOPO_PI_LANES_16 (0x50) #define MPI3_EVENT_PCIE_TOPO_PI_RATE_MASK (0x0f) +#define MPI3_EVENT_PCIE_TOPO_PI_RATE_SHIFT (0) #define MPI3_EVENT_PCIE_TOPO_PI_RATE_UNKNOWN (0x00) #define MPI3_EVENT_PCIE_TOPO_PI_RATE_DISABLED (0x01) #define MPI3_EVENT_PCIE_TOPO_PI_RATE_2_5 (0x02) @@ -881,6 +897,7 @@ struct mpi3_pel_req_action_acknowledge { }; #define MPI3_PELACKNOWLEDGE_MSGFLAGS_SAFE_MODE_EXIT_MASK (0x03) +#define MPI3_PELACKNOWLEDGE_MSGFLAGS_SAFE_MODE_EXIT_SHIFT (0) #define MPI3_PELACKNOWLEDGE_MSGFLAGS_SAFE_MODE_EXIT_NO_GUIDANCE (0x00) #define MPI3_PELACKNOWLEDGE_MSGFLAGS_SAFE_MODE_EXIT_CONTINUE_OP (0x01) #define MPI3_PELACKNOWLEDGE_MSGFLAGS_SAFE_MODE_EXIT_TRANSITION_TO_FAULT (0x02) @@ -924,6 +941,7 @@ struct mpi3_ci_download_request { #define MPI3_CI_DOWNLOAD_MSGFLAGS_FORCE_FMC_ENABLE (0x40) #define MPI3_CI_DOWNLOAD_MSGFLAGS_SIGNED_NVDATA (0x20) #define MPI3_CI_DOWNLOAD_MSGFLAGS_WRITE_CACHE_FLUSH_MASK (0x03) +#define MPI3_CI_DOWNLOAD_MSGFLAGS_WRITE_CACHE_FLUSH_SHIFT (0) #define MPI3_CI_DOWNLOAD_MSGFLAGS_WRITE_CACHE_FLUSH_FAST (0x00) #define MPI3_CI_DOWNLOAD_MSGFLAGS_WRITE_CACHE_FLUSH_MEDIUM (0x01) #define MPI3_CI_DOWNLOAD_MSGFLAGS_WRITE_CACHE_FLUSH_SLOW (0x02) @@ -953,6 +971,7 @@ struct mpi3_ci_download_reply { #define MPI3_CI_DOWNLOAD_FLAGS_OFFLINE_ACTIVATION_REQUIRED (0x20) #define MPI3_CI_DOWNLOAD_FLAGS_KEY_UPDATE_PENDING (0x10) #define MPI3_CI_DOWNLOAD_FLAGS_ACTIVATION_STATUS_MASK (0x0e) +#define MPI3_CI_DOWNLOAD_FLAGS_ACTIVATION_STATUS_SHIFT (1) #define MPI3_CI_DOWNLOAD_FLAGS_ACTIVATION_STATUS_NOT_NEEDED (0x00) #define MPI3_CI_DOWNLOAD_FLAGS_ACTIVATION_STATUS_AWAITING (0x02) #define MPI3_CI_DOWNLOAD_FLAGS_ACTIVATION_STATUS_ONLINE_PENDING (0x04) @@ -976,9 +995,11 @@ struct mpi3_ci_upload_request { }; #define MPI3_CI_UPLOAD_MSGFLAGS_LOCATION_MASK (0x01) +#define MPI3_CI_UPLOAD_MSGFLAGS_LOCATION_SHIFT (0) #define MPI3_CI_UPLOAD_MSGFLAGS_LOCATION_PRIMARY (0x00) #define MPI3_CI_UPLOAD_MSGFLAGS_LOCATION_SECONDARY (0x01) #define MPI3_CI_UPLOAD_MSGFLAGS_FORMAT_MASK (0x02) +#define MPI3_CI_UPLOAD_MSGFLAGS_FORMAT_SHIFT (1) #define MPI3_CI_UPLOAD_MSGFLAGS_FORMAT_FLASH (0x00) #define MPI3_CI_UPLOAD_MSGFLAGS_FORMAT_EXECUTABLE (0x02) #define MPI3_CTRL_OP_FORCE_FULL_DISCOVERY (0x01) diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_tool.h b/drivers/scsi/mpi3mr/mpi/mpi30_tool.h index 3b960893870f..50a65b16a818 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_tool.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_tool.h @@ -9,6 +9,7 @@ #define MPI3_DIAG_BUFFER_TYPE_FW (0x02) #define MPI3_DIAG_BUFFER_ACTION_RELEASE (0x01) +#define MPI3_DIAG_BUFFER_POST_MSGFLAGS_SEGMENTED (0x01) struct mpi3_diag_buffer_post_request { __le16 host_tag; u8 ioc_use_only02; diff --git a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h index b2ab25a1cfeb..5c522e2531c3 100644 --- a/drivers/scsi/mpi3mr/mpi/mpi30_transport.h +++ b/drivers/scsi/mpi3mr/mpi/mpi30_transport.h @@ -18,7 +18,7 @@ union mpi3_version_union { #define MPI3_VERSION_MAJOR (3) #define MPI3_VERSION_MINOR (0) -#define MPI3_VERSION_UNIT (34) +#define MPI3_VERSION_UNIT (35) #define MPI3_VERSION_DEV (0) #define MPI3_DEVHANDLE_INVALID (0xffff) struct mpi3_sysif_oper_queue_indexes { @@ -80,6 +80,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_IOC_CONFIG_OPER_RPY_ENT_SZ_SHIFT (20) #define MPI3_SYSIF_IOC_CONFIG_OPER_REQ_ENT_SZ (0x000f0000) #define MPI3_SYSIF_IOC_CONFIG_OPER_REQ_ENT_SZ_SHIFT (16) +#define MPI3_SYSIF_IOC_CONFIG_SHUTDOWN_SHIFT (14) #define MPI3_SYSIF_IOC_CONFIG_SHUTDOWN_MASK (0x0000c000) #define MPI3_SYSIF_IOC_CONFIG_SHUTDOWN_NO (0x00000000) #define MPI3_SYSIF_IOC_CONFIG_SHUTDOWN_NORMAL (0x00004000) @@ -97,6 +98,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_IOC_STATUS_READY (0x00000001) #define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_OFFSET (0x00000024) #define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_REQ_MASK (0x0fff) +#define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_REQ_SHIFT (0) #define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_REPLY_OFFSET (0x00000026) #define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_REPLY_MASK (0x0fff0000) #define MPI3_SYSIF_ADMIN_Q_NUM_ENTRIES_REPLY_SHIFT (16) @@ -106,6 +108,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_ADMIN_REPLY_Q_ADDR_HIGH_OFFSET (0x00000034) #define MPI3_SYSIF_COALESCE_CONTROL_OFFSET (0x00000040) #define MPI3_SYSIF_COALESCE_CONTROL_ENABLE_MASK (0xc0000000) +#define MPI3_SYSIF_COALESCE_CONTROL_ENABLE_SHIFT (30) #define MPI3_SYSIF_COALESCE_CONTROL_ENABLE_NO_CHANGE (0x00000000) #define MPI3_SYSIF_COALESCE_CONTROL_ENABLE_DISABLE (0x40000000) #define MPI3_SYSIF_COALESCE_CONTROL_ENABLE_ENABLE (0xc0000000) @@ -124,6 +127,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_OPER_REPLY_Q_N_CI_OFFSET(N) (MPI3_SYSIF_OPER_REPLY_Q_CI_OFFSET + (((N) - 1) * 8)) #define MPI3_SYSIF_WRITE_SEQUENCE_OFFSET (0x00001c04) #define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_MASK (0x0000000f) +#define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_SHIFT (0) #define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_FLUSH (0x0) #define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_1ST (0xf) #define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_2ND (0x4) @@ -133,6 +137,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_WRITE_SEQUENCE_KEY_VALUE_6TH (0xd) #define MPI3_SYSIF_HOST_DIAG_OFFSET (0x00001c08) #define MPI3_SYSIF_HOST_DIAG_RESET_ACTION_MASK (0x00000700) +#define MPI3_SYSIF_HOST_DIAG_RESET_ACTION_SHIFT (8) #define MPI3_SYSIF_HOST_DIAG_RESET_ACTION_NO_RESET (0x00000000) #define MPI3_SYSIF_HOST_DIAG_RESET_ACTION_SOFT_RESET (0x00000100) #define MPI3_SYSIF_HOST_DIAG_RESET_ACTION_HOST_CONTROL_BOOT_RESET (0x00000200) @@ -151,6 +156,7 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_FAULT_FUNC_AREA_SHIFT (24) #define MPI3_SYSIF_FAULT_FUNC_AREA_MPI_DEFINED (0x00000000) #define MPI3_SYSIF_FAULT_CODE_MASK (0x0000ffff) +#define MPI3_SYSIF_FAULT_CODE_SHIFT (0) #define MPI3_SYSIF_FAULT_CODE_DIAG_FAULT_RESET (0x0000f000) #define MPI3_SYSIF_FAULT_CODE_CI_ACTIVATION_RESET (0x0000f001) #define MPI3_SYSIF_FAULT_CODE_SOFT_RESET_IN_PROGRESS (0x0000f002) @@ -176,17 +182,20 @@ struct mpi3_sysif_registers { #define MPI3_SYSIF_DIAG_RW_ADDRESS_HIGH_OFFSET (0x00001c5c) #define MPI3_SYSIF_DIAG_RW_CONTROL_OFFSET (0x00001c60) #define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_MASK (0x00000030) +#define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_SHIFT (4) #define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_1BYTE (0x00000000) #define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_2BYTES (0x00000010) #define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_4BYTES (0x00000020) #define MPI3_SYSIF_DIAG_RW_CONTROL_LEN_8BYTES (0x00000030) #define MPI3_SYSIF_DIAG_RW_CONTROL_RESET (0x00000004) #define MPI3_SYSIF_DIAG_RW_CONTROL_DIR_MASK (0x00000002) +#define MPI3_SYSIF_DIAG_RW_CONTROL_DIR_SHIFT (1) #define MPI3_SYSIF_DIAG_RW_CONTROL_DIR_READ (0x00000000) #define MPI3_SYSIF_DIAG_RW_CONTROL_DIR_WRITE (0x00000002) #define MPI3_SYSIF_DIAG_RW_CONTROL_START (0x00000001) #define MPI3_SYSIF_DIAG_RW_STATUS_OFFSET (0x00001c62) #define MPI3_SYSIF_DIAG_RW_STATUS_STATUS_MASK (0x0000000e) +#define MPI3_SYSIF_DIAG_RW_STATUS_STATUS_SHIFT (1) #define MPI3_SYSIF_DIAG_RW_STATUS_STATUS_SUCCESS (0x00000000) #define MPI3_SYSIF_DIAG_RW_STATUS_STATUS_INV_ADDR (0x00000002) #define MPI3_SYSIF_DIAG_RW_STATUS_STATUS_ACC_ERR (0x00000004) @@ -207,7 +216,9 @@ struct mpi3_default_reply_descriptor { }; #define MPI3_REPLY_DESCRIPT_FLAGS_PHASE_MASK (0x0001) +#define MPI3_REPLY_DESCRIPT_FLAGS_PHASE_SHIFT (0) #define MPI3_REPLY_DESCRIPT_FLAGS_TYPE_MASK (0xf000) +#define MPI3_REPLY_DESCRIPT_FLAGS_TYPE_SHIFT (12) #define MPI3_REPLY_DESCRIPT_FLAGS_TYPE_ADDRESS_REPLY (0x0000) #define MPI3_REPLY_DESCRIPT_FLAGS_TYPE_SUCCESS (0x1000) #define MPI3_REPLY_DESCRIPT_FLAGS_TYPE_TARGET_COMMAND_BUFFER (0x2000) @@ -301,6 +312,7 @@ union mpi3_sge_union { }; #define MPI3_SGE_FLAGS_ELEMENT_TYPE_MASK (0xf0) +#define MPI3_SGE_FLAGS_ELEMENT_TYPE_SHIFT (4) #define MPI3_SGE_FLAGS_ELEMENT_TYPE_SIMPLE (0x00) #define MPI3_SGE_FLAGS_ELEMENT_TYPE_BIT_BUCKET (0x10) #define MPI3_SGE_FLAGS_ELEMENT_TYPE_CHAIN (0x20) @@ -309,6 +321,7 @@ union mpi3_sge_union { #define MPI3_SGE_FLAGS_END_OF_LIST (0x08) #define MPI3_SGE_FLAGS_END_OF_BUFFER (0x04) #define MPI3_SGE_FLAGS_DLAS_MASK (0x03) +#define MPI3_SGE_FLAGS_DLAS_SHIFT (0) #define MPI3_SGE_FLAGS_DLAS_SYSTEM (0x00) #define MPI3_SGE_FLAGS_DLAS_IOC_UDP (0x01) #define MPI3_SGE_FLAGS_DLAS_IOC_CTL (0x02) @@ -322,15 +335,18 @@ union mpi3_sge_union { #define MPI3_EEDPFLAGS_CHK_APP_TAG (0x0200) #define MPI3_EEDPFLAGS_CHK_GUARD (0x0100) #define MPI3_EEDPFLAGS_ESC_MODE_MASK (0x00c0) +#define MPI3_EEDPFLAGS_ESC_MODE_SHIFT (6) #define MPI3_EEDPFLAGS_ESC_MODE_DO_NOT_DISABLE (0x0040) #define MPI3_EEDPFLAGS_ESC_MODE_APPTAG_DISABLE (0x0080) #define MPI3_EEDPFLAGS_ESC_MODE_APPTAG_REFTAG_DISABLE (0x00c0) #define MPI3_EEDPFLAGS_HOST_GUARD_MASK (0x0030) +#define MPI3_EEDPFLAGS_HOST_GUARD_SHIFT (4) #define MPI3_EEDPFLAGS_HOST_GUARD_T10_CRC (0x0000) #define MPI3_EEDPFLAGS_HOST_GUARD_IP_CHKSUM (0x0010) #define MPI3_EEDPFLAGS_HOST_GUARD_OEM_SPECIFIC (0x0020) #define MPI3_EEDPFLAGS_PT_REF_TAG (0x0008) #define MPI3_EEDPFLAGS_EEDP_OP_MASK (0x0007) +#define MPI3_EEDPFLAGS_EEDP_OP_SHIFT (0) #define MPI3_EEDPFLAGS_EEDP_OP_CHECK (0x0001) #define MPI3_EEDPFLAGS_EEDP_OP_STRIP (0x0002) #define MPI3_EEDPFLAGS_EEDP_OP_CHECK_REMOVE (0x0003) @@ -403,6 +419,7 @@ struct mpi3_default_reply { #define MPI3_IOCSTATUS_LOG_INFO_AVAIL_MASK (0x8000) #define MPI3_IOCSTATUS_LOG_INFO_AVAILABLE (0x8000) #define MPI3_IOCSTATUS_STATUS_MASK (0x7fff) +#define MPI3_IOCSTATUS_STATUS_SHIFT (0) #define MPI3_IOCSTATUS_SUCCESS (0x0000) #define MPI3_IOCSTATUS_INVALID_FUNCTION (0x0001) #define MPI3_IOCSTATUS_BUSY (0x0002) @@ -469,4 +486,5 @@ struct mpi3_default_reply { #define MPI3_IOCLOGINFO_TYPE_NONE (0x0) #define MPI3_IOCLOGINFO_TYPE_SAS (0x3) #define MPI3_IOCLOGINFO_LOG_DATA_MASK (0x0fffffff) +#define MPI3_IOCLOGINFO_LOG_DATA_SHIFT (0) #endif diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h index 0d72b5f1b69d..9bbc7cb98ca3 100644 --- a/drivers/scsi/mpi3mr/mpi3mr.h +++ b/drivers/scsi/mpi3mr/mpi3mr.h @@ -56,8 +56,8 @@ extern struct list_head mrioc_list; extern int prot_mask; extern atomic64_t event_counter; -#define MPI3MR_DRIVER_VERSION "8.12.0.3.50" -#define MPI3MR_DRIVER_RELDATE "11-November-2024" +#define MPI3MR_DRIVER_VERSION "8.13.0.5.50" +#define MPI3MR_DRIVER_RELDATE "20-February-2025" #define MPI3MR_DRIVER_NAME "mpi3mr" #define MPI3MR_DRIVER_LICENSE "GPL" @@ -80,13 +80,14 @@ extern atomic64_t event_counter; /* Admin queue management definitions */ #define MPI3MR_ADMIN_REQ_Q_SIZE (2 * MPI3MR_PAGE_SIZE_4K) -#define MPI3MR_ADMIN_REPLY_Q_SIZE (4 * MPI3MR_PAGE_SIZE_4K) +#define MPI3MR_ADMIN_REPLY_Q_SIZE (8 * MPI3MR_PAGE_SIZE_4K) #define MPI3MR_ADMIN_REQ_FRAME_SZ 128 #define MPI3MR_ADMIN_REPLY_FRAME_SZ 16 /* Operational queue management definitions */ #define MPI3MR_OP_REQ_Q_QD 512 #define MPI3MR_OP_REP_Q_QD 1024 +#define MPI3MR_OP_REP_Q_QD2K 2048 #define MPI3MR_OP_REP_Q_QD4K 4096 #define MPI3MR_OP_REQ_Q_SEG_SIZE 4096 #define MPI3MR_OP_REP_Q_SEG_SIZE 4096 @@ -328,6 +329,7 @@ enum mpi3mr_reset_reason { #define MPI3MR_RESET_REASON_OSTYPE_SHIFT 28 #define MPI3MR_RESET_REASON_IOCNUM_SHIFT 20 + /* Queue type definitions */ enum queue_type { MPI3MR_DEFAULT_QUEUE = 0, @@ -387,6 +389,7 @@ struct mpi3mr_ioc_facts { u16 max_msix_vectors; u8 personality; u8 dma_mask; + bool max_req_limit; u8 protocol_flags; u8 sge_mod_mask; u8 sge_mod_value; @@ -456,6 +459,8 @@ struct op_req_qinfo { * @enable_irq_poll: Flag to indicate polling is enabled * @in_use: Queue is handled by poll/ISR * @qtype: Type of queue (types defined in enum queue_type) + * @qfull_watermark: Watermark defined in reply queue to avoid + * reply queue full */ struct op_reply_qinfo { u16 ci; @@ -471,6 +476,7 @@ struct op_reply_qinfo { bool enable_irq_poll; atomic_t in_use; enum queue_type qtype; + u16 qfull_watermark; }; /** @@ -928,6 +934,8 @@ struct trigger_event_data { * @size: Buffer size * @addr: Virtual address * @dma_addr: Buffer DMA address + * @is_segmented: The buffer is segmented or not + * @disabled_after_reset: The buffer is disabled after reset */ struct diag_buffer_desc { u8 type; @@ -937,6 +945,8 @@ struct diag_buffer_desc { u32 size; void *addr; dma_addr_t dma_addr; + bool is_segmented; + bool disabled_after_reset; }; /** @@ -1022,6 +1032,8 @@ struct scmd_priv { * @admin_reply_base: Admin reply queue base virtual address * @admin_reply_dma: Admin reply queue base dma address * @admin_reply_q_in_use: Queue is handled by poll/ISR + * @admin_pend_isr: Count of unprocessed admin ISR/poll calls + * due to another thread processing replies * @ready_timeout: Controller ready timeout * @intr_info: Interrupt cookie pointer * @intr_info_count: Number of interrupt cookies @@ -1090,6 +1102,7 @@ struct scmd_priv { * @ts_update_interval: Timestamp update interval * @reset_in_progress: Reset in progress flag * @unrecoverable: Controller unrecoverable flag + * @io_admin_reset_sync: Manage state of I/O ops during an admin reset process * @prev_reset_result: Result of previous reset * @reset_mutex: Controller reset mutex * @reset_waitq: Controller reset wait queue @@ -1153,6 +1166,12 @@ struct scmd_priv { * @snapdump_trigger_active: Snapdump trigger active flag * @pci_err_recovery: PCI error recovery in progress * @block_on_pci_err: Block IO during PCI error recovery + * @reply_qfull_count: Occurences of reply queue full avoidance kicking-in + * @prevent_reply_qfull: Enable reply queue prevention + * @seg_tb_support: Segmented trace buffer support + * @num_tb_segs: Number of Segments in Trace buffer + * @trace_buf_pool: DMA pool for Segmented trace buffer segments + * @trace_buf: Trace buffer segments memory descriptor */ struct mpi3mr_ioc { struct list_head list; @@ -1189,6 +1208,7 @@ struct mpi3mr_ioc { void *admin_reply_base; dma_addr_t admin_reply_dma; atomic_t admin_reply_q_in_use; + atomic_t admin_pend_isr; u32 ready_timeout; @@ -1276,6 +1296,7 @@ struct mpi3mr_ioc { u16 ts_update_interval; u8 reset_in_progress; u8 unrecoverable; + u8 io_admin_reset_sync; int prev_reset_result; struct mutex reset_mutex; wait_queue_head_t reset_waitq; @@ -1351,6 +1372,13 @@ struct mpi3mr_ioc { bool fw_release_trigger_active; bool pci_err_recovery; bool block_on_pci_err; + atomic_t reply_qfull_count; + bool prevent_reply_qfull; + bool seg_tb_support; + u32 num_tb_segs; + struct dma_pool *trace_buf_pool; + struct segments *trace_buf; + }; /** diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 7589f48aebc8..f36663613950 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -12,23 +12,98 @@ #include <uapi/scsi/scsi_bsg_mpi3mr.h> /** - * mpi3mr_alloc_trace_buffer: Allocate trace buffer + * mpi3mr_alloc_trace_buffer: Allocate segmented trace buffer * @mrioc: Adapter instance reference * @trace_size: Trace buffer size * - * Allocate trace buffer + * Allocate either segmented memory pools or contiguous buffer + * based on the controller capability for the host trace + * buffer. + * * Return: 0 on success, non-zero on failure. */ static int mpi3mr_alloc_trace_buffer(struct mpi3mr_ioc *mrioc, u32 trace_size) { struct diag_buffer_desc *diag_buffer = &mrioc->diag_buffers[0]; + int i, sz; + u64 *diag_buffer_list = NULL; + dma_addr_t diag_buffer_list_dma; + u32 seg_count; + + if (mrioc->seg_tb_support) { + seg_count = (trace_size) / MPI3MR_PAGE_SIZE_4K; + trace_size = seg_count * MPI3MR_PAGE_SIZE_4K; + + diag_buffer_list = dma_alloc_coherent(&mrioc->pdev->dev, + sizeof(u64) * seg_count, + &diag_buffer_list_dma, GFP_KERNEL); + if (!diag_buffer_list) + return -1; + + mrioc->num_tb_segs = seg_count; + + sz = sizeof(struct segments) * seg_count; + mrioc->trace_buf = kzalloc(sz, GFP_KERNEL); + if (!mrioc->trace_buf) + goto trace_buf_failed; + + mrioc->trace_buf_pool = dma_pool_create("trace_buf pool", + &mrioc->pdev->dev, MPI3MR_PAGE_SIZE_4K, MPI3MR_PAGE_SIZE_4K, + 0); + if (!mrioc->trace_buf_pool) { + ioc_err(mrioc, "trace buf pool: dma_pool_create failed\n"); + goto trace_buf_pool_failed; + } - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - trace_size, &diag_buffer->dma_addr, GFP_KERNEL); - if (diag_buffer->addr) { - dprint_init(mrioc, "trace diag buffer is allocated successfully\n"); + for (i = 0; i < seg_count; i++) { + mrioc->trace_buf[i].segment = + dma_pool_zalloc(mrioc->trace_buf_pool, GFP_KERNEL, + &mrioc->trace_buf[i].segment_dma); + diag_buffer_list[i] = + (u64) mrioc->trace_buf[i].segment_dma; + if (!diag_buffer_list[i]) + goto tb_seg_alloc_failed; + } + + diag_buffer->addr = diag_buffer_list; + diag_buffer->dma_addr = diag_buffer_list_dma; + diag_buffer->is_segmented = true; + + dprint_init(mrioc, "segmented trace diag buffer\n" + "is allocated successfully seg_count:%d\n", seg_count); return 0; + } else { + diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, + trace_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (diag_buffer->addr) { + dprint_init(mrioc, "trace diag buffer is allocated successfully\n"); + return 0; + } + return -1; } + +tb_seg_alloc_failed: + if (mrioc->trace_buf_pool) { + for (i = 0; i < mrioc->num_tb_segs; i++) { + if (mrioc->trace_buf[i].segment) { + dma_pool_free(mrioc->trace_buf_pool, + mrioc->trace_buf[i].segment, + mrioc->trace_buf[i].segment_dma); + mrioc->trace_buf[i].segment = NULL; + } + mrioc->trace_buf[i].segment = NULL; + } + dma_pool_destroy(mrioc->trace_buf_pool); + mrioc->trace_buf_pool = NULL; + } +trace_buf_pool_failed: + kfree(mrioc->trace_buf); + mrioc->trace_buf = NULL; +trace_buf_failed: + if (diag_buffer_list) + dma_free_coherent(&mrioc->pdev->dev, + sizeof(u64) * mrioc->num_tb_segs, + diag_buffer_list, diag_buffer_list_dma); return -1; } @@ -100,8 +175,9 @@ void mpi3mr_alloc_diag_bufs(struct mpi3mr_ioc *mrioc) dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (get_order(trace_size) > MAX_PAGE_ORDER || + if ((!mrioc->seg_tb_support && (get_order(trace_size) > MAX_PAGE_ORDER)) || mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -161,6 +237,12 @@ int mpi3mr_issue_diag_buf_post(struct mpi3mr_ioc *mrioc, u8 prev_status; int retval = 0; + if (diag_buffer->disabled_after_reset) { + dprint_bsg_err(mrioc, "%s: skipping diag buffer posting\n" + "as it is disabled after reset\n", __func__); + return -1; + } + memset(&diag_buf_post_req, 0, sizeof(diag_buf_post_req)); mutex_lock(&mrioc->init_cmds.mutex); if (mrioc->init_cmds.state & MPI3MR_CMD_PENDING) { @@ -177,8 +259,12 @@ int mpi3mr_issue_diag_buf_post(struct mpi3mr_ioc *mrioc, diag_buf_post_req.address = le64_to_cpu(diag_buffer->dma_addr); diag_buf_post_req.length = le32_to_cpu(diag_buffer->size); - dprint_bsg_info(mrioc, "%s: posting diag buffer type %d\n", __func__, - diag_buffer->type); + if (diag_buffer->is_segmented) + diag_buf_post_req.msg_flags |= MPI3_DIAG_BUFFER_POST_MSGFLAGS_SEGMENTED; + + dprint_bsg_info(mrioc, "%s: posting diag buffer type %d segmented:%d\n", __func__, + diag_buffer->type, diag_buffer->is_segmented); + prev_status = diag_buffer->status; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_POSTED_UNPAUSED; init_completion(&mrioc->init_cmds.done); @@ -2339,6 +2425,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job) } if (!mrioc->ioctl_sges_allocated) { + mutex_unlock(&mrioc->bsg_cmds.mutex); dprint_bsg_err(mrioc, "%s: DMA memory was not allocated\n", __func__); return -ENOMEM; @@ -3060,6 +3147,29 @@ reply_queue_count_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(reply_queue_count); +/** + * reply_qfull_count_show - Show reply qfull count + * @dev: class device + * @attr: Device attributes + * @buf: Buffer to copy + * + * Retrieves the current value of the reply_qfull_count from the mrioc structure and + * formats it as a string for display. + * + * Return: sysfs_emit() return + */ +static ssize_t +reply_qfull_count_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct mpi3mr_ioc *mrioc = shost_priv(shost); + + return sysfs_emit(buf, "%u\n", atomic_read(&mrioc->reply_qfull_count)); +} + +static DEVICE_ATTR_RO(reply_qfull_count); + /** * logging_level_show - Show controller debug level * @dev: class device @@ -3152,6 +3262,7 @@ static struct attribute *mpi3mr_host_attrs[] = { &dev_attr_fw_queue_depth.attr, &dev_attr_op_req_q_count.attr, &dev_attr_reply_queue_count.attr, + &dev_attr_reply_qfull_count.attr, &dev_attr_logging_level.attr, &dev_attr_adp_state.attr, NULL, diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 5ed31fe57474..3fcb1ad3b070 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -17,7 +17,7 @@ static void mpi3mr_process_factsdata(struct mpi3mr_ioc *mrioc, struct mpi3_ioc_facts_data *facts_data); static void mpi3mr_pel_wait_complete(struct mpi3mr_ioc *mrioc, struct mpi3mr_drv_cmd *drv_cmd); - +static int mpi3mr_check_op_admin_proc(struct mpi3mr_ioc *mrioc); static int poll_queues; module_param(poll_queues, int, 0444); MODULE_PARM_DESC(poll_queues, "Number of queues for io_uring poll mode. (Range 1 - 126)"); @@ -446,8 +446,10 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) u16 threshold_comps = 0; struct mpi3_default_reply_descriptor *reply_desc; - if (!atomic_add_unless(&mrioc->admin_reply_q_in_use, 1, 1)) + if (!atomic_add_unless(&mrioc->admin_reply_q_in_use, 1, 1)) { + atomic_inc(&mrioc->admin_pend_isr); return 0; + } reply_desc = (struct mpi3_default_reply_descriptor *)mrioc->admin_reply_base + admin_reply_ci; @@ -459,7 +461,7 @@ int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc) } do { - if (mrioc->unrecoverable) + if (mrioc->unrecoverable || mrioc->io_admin_reset_sync) break; mrioc->admin_req_ci = le16_to_cpu(reply_desc->request_queue_ci); @@ -554,7 +556,7 @@ int mpi3mr_process_op_reply_q(struct mpi3mr_ioc *mrioc, } do { - if (mrioc->unrecoverable) + if (mrioc->unrecoverable || mrioc->io_admin_reset_sync) break; req_q_idx = le16_to_cpu(reply_desc->request_queue_id) - 1; @@ -1302,7 +1304,7 @@ static int mpi3mr_issue_and_process_mur(struct mpi3mr_ioc *mrioc, (ioc_config & MPI3_SYSIF_IOC_CONFIG_ENABLE_IOC))) retval = 0; - ioc_info(mrioc, "Base IOC Sts/Config after %s MUR is (0x%x)/(0x%x)\n", + ioc_info(mrioc, "Base IOC Sts/Config after %s MUR is (0x%08x)/(0x%08x)\n", (!retval) ? "successful" : "failed", ioc_status, ioc_config); return retval; } @@ -1355,6 +1357,19 @@ mpi3mr_revalidate_factsdata(struct mpi3mr_ioc *mrioc) "\tcontroller while sas transport support is enabled at the\n" "\tdriver, please reboot the system or reload the driver\n"); + if (mrioc->seg_tb_support) { + if (!(mrioc->facts.ioc_capabilities & + MPI3_IOCFACTS_CAPABILITY_SEG_DIAG_TRACE_SUPPORTED)) { + ioc_err(mrioc, + "critical error: previously enabled segmented trace\n" + " buffer capability is disabled after reset. Please\n" + " update the firmware or reboot the system or\n" + " reload the driver to enable trace diag buffer\n"); + mrioc->diag_buffers[0].disabled_after_reset = true; + } else + mrioc->diag_buffers[0].disabled_after_reset = false; + } + if (mrioc->facts.max_devhandle > mrioc->dev_handle_bitmap_bits) { removepend_bitmap = bitmap_zalloc(mrioc->facts.max_devhandle, GFP_KERNEL); @@ -1717,7 +1732,7 @@ static int mpi3mr_issue_reset(struct mpi3mr_ioc *mrioc, u16 reset_type, ioc_config = readl(&mrioc->sysif_regs->ioc_configuration); ioc_status = readl(&mrioc->sysif_regs->ioc_status); ioc_info(mrioc, - "ioc_status/ioc_onfig after %s reset is (0x%x)/(0x%x)\n", + "ioc_status/ioc_config after %s reset is (0x%08x)/(0x%08x)\n", (!retval)?"successful":"failed", ioc_status, ioc_config); if (retval) @@ -2104,15 +2119,22 @@ static int mpi3mr_create_op_reply_q(struct mpi3mr_ioc *mrioc, u16 qidx) } reply_qid = qidx + 1; - op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD; - if ((mrioc->pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) && - !mrioc->pdev->revision) - op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD4K; + + if (mrioc->pdev->device == MPI3_MFGPAGE_DEVID_SAS4116) { + if (mrioc->pdev->revision) + op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD; + else + op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD4K; + } else + op_reply_q->num_replies = MPI3MR_OP_REP_Q_QD2K; + op_reply_q->ci = 0; op_reply_q->ephase = 1; atomic_set(&op_reply_q->pend_ios, 0); atomic_set(&op_reply_q->in_use, 0); op_reply_q->enable_irq_poll = false; + op_reply_q->qfull_watermark = + op_reply_q->num_replies - (MPI3MR_THRESHOLD_REPLY_COUNT * 2); if (!op_reply_q->q_segments) { retval = mpi3mr_alloc_op_reply_q_segments(mrioc, qidx); @@ -2416,8 +2438,10 @@ int mpi3mr_op_request_post(struct mpi3mr_ioc *mrioc, void *segment_base_addr; u16 req_sz = mrioc->facts.op_req_sz; struct segments *segments = op_req_q->q_segments; + struct op_reply_qinfo *op_reply_q = NULL; reply_qidx = op_req_q->reply_qid - 1; + op_reply_q = mrioc->op_reply_qinfo + reply_qidx; if (mrioc->unrecoverable) return -EFAULT; @@ -2448,6 +2472,15 @@ int mpi3mr_op_request_post(struct mpi3mr_ioc *mrioc, goto out; } + /* Reply queue is nearing to get full, push back IOs to SML */ + if ((mrioc->prevent_reply_qfull == true) && + (atomic_read(&op_reply_q->pend_ios) > + (op_reply_q->qfull_watermark))) { + atomic_inc(&mrioc->reply_qfull_count); + retval = -EAGAIN; + goto out; + } + segment_base_addr = segments[pi / op_req_q->segment_qd].segment; req_entry = (u8 *)segment_base_addr + ((pi % op_req_q->segment_qd) * req_sz); @@ -2726,7 +2759,16 @@ static void mpi3mr_watchdog_work(struct work_struct *work) return; } - if (mrioc->ts_update_counter++ >= mrioc->ts_update_interval) { + if (atomic_read(&mrioc->admin_pend_isr)) { + ioc_err(mrioc, "Unprocessed admin ISR instance found\n" + "flush admin replies\n"); + mpi3mr_process_admin_reply_q(mrioc); + } + + if (!(mrioc->facts.ioc_capabilities & + MPI3_IOCFACTS_CAPABILITY_NON_SUPERVISOR_IOC) && + (mrioc->ts_update_counter++ >= mrioc->ts_update_interval)) { + mrioc->ts_update_counter = 0; mpi3mr_sync_timestamp(mrioc); } @@ -3091,6 +3133,9 @@ static void mpi3mr_process_factsdata(struct mpi3mr_ioc *mrioc, mrioc->facts.dma_mask = (facts_flags & MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_MASK) >> MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_SHIFT; + mrioc->facts.dma_mask = (facts_flags & + MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_MASK) >> + MPI3_IOCFACTS_FLAGS_DMA_ADDRESS_WIDTH_SHIFT; mrioc->facts.protocol_flags = facts_data->protocol_flags; mrioc->facts.mpi_version = le32_to_cpu(facts_data->mpi_version.word); mrioc->facts.max_reqs = le16_to_cpu(facts_data->max_outstanding_requests); @@ -4214,6 +4259,13 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc) mrioc->shost->transportt = mpi3mr_transport_template; } + if (mrioc->facts.max_req_limit) + mrioc->prevent_reply_qfull = true; + + if (mrioc->facts.ioc_capabilities & + MPI3_IOCFACTS_CAPABILITY_SEG_DIAG_TRACE_SUPPORTED) + mrioc->seg_tb_support = true; + mrioc->reply_sz = mrioc->facts.reply_sz; retval = mpi3mr_check_reset_dma_mask(mrioc); @@ -4370,6 +4422,7 @@ int mpi3mr_reinit_ioc(struct mpi3mr_ioc *mrioc, u8 is_resume) goto out_failed_noretry; } + mrioc->io_admin_reset_sync = 0; if (is_resume || mrioc->block_on_pci_err) { dprint_reset(mrioc, "setting up single ISR\n"); retval = mpi3mr_setup_isr(mrioc, 1); @@ -4671,7 +4724,7 @@ void mpi3mr_memset_buffers(struct mpi3mr_ioc *mrioc) */ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc) { - u16 i; + u16 i, j; struct mpi3mr_intr_info *intr_info; struct diag_buffer_desc *diag_buffer; @@ -4806,6 +4859,26 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc) for (i = 0; i < MPI3MR_MAX_NUM_HDB; i++) { diag_buffer = &mrioc->diag_buffers[i]; + if ((i == 0) && mrioc->seg_tb_support) { + if (mrioc->trace_buf_pool) { + for (j = 0; j < mrioc->num_tb_segs; j++) { + if (mrioc->trace_buf[j].segment) { + dma_pool_free(mrioc->trace_buf_pool, + mrioc->trace_buf[j].segment, + mrioc->trace_buf[j].segment_dma); + mrioc->trace_buf[j].segment = NULL; + } + + mrioc->trace_buf[j].segment = NULL; + } + dma_pool_destroy(mrioc->trace_buf_pool); + mrioc->trace_buf_pool = NULL; + } + + kfree(mrioc->trace_buf); + mrioc->trace_buf = NULL; + diag_buffer->size = sizeof(u64) * mrioc->num_tb_segs; + } if (diag_buffer->addr) { dma_free_coherent(&mrioc->pdev->dev, diag_buffer->size, diag_buffer->addr, @@ -4883,7 +4956,7 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc) } ioc_info(mrioc, - "Base IOC Sts/Config after %s shutdown is (0x%x)/(0x%x)\n", + "Base IOC Sts/Config after %s shutdown is (0x%08x)/(0x%08x)\n", (!retval) ? "successful" : "failed", ioc_status, ioc_config); } @@ -5228,6 +5301,55 @@ void mpi3mr_pel_get_seqnum_complete(struct mpi3mr_ioc *mrioc, drv_cmd->retry_count = 0; } +/** + * mpi3mr_check_op_admin_proc - + * @mrioc: Adapter instance reference + * + * Check if any of the operation reply queues + * or the admin reply queue are currently in use. + * If any queue is in use, this function waits for + * a maximum of 10 seconds for them to become available. + * + * Return: 0 on success, non-zero on failure. + */ +static int mpi3mr_check_op_admin_proc(struct mpi3mr_ioc *mrioc) +{ + + u16 timeout = 10 * 10; + u16 elapsed_time = 0; + bool op_admin_in_use = false; + + do { + op_admin_in_use = false; + + /* Check admin_reply queue first to exit early */ + if (atomic_read(&mrioc->admin_reply_q_in_use) == 1) + op_admin_in_use = true; + else { + /* Check op_reply queues */ + int i; + + for (i = 0; i < mrioc->num_queues; i++) { + if (atomic_read(&mrioc->op_reply_qinfo[i].in_use) == 1) { + op_admin_in_use = true; + break; + } + } + } + + if (!op_admin_in_use) + break; + + msleep(100); + + } while (++elapsed_time < timeout); + + if (op_admin_in_use) + return 1; + + return 0; +} + /** * mpi3mr_soft_reset_handler - Reset the controller * @mrioc: Adapter instance reference @@ -5308,6 +5430,7 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, mpi3mr_wait_for_host_io(mrioc, MPI3MR_RESET_HOST_IOWAIT_TIMEOUT); mpi3mr_ioc_disable_intr(mrioc); + mrioc->io_admin_reset_sync = 1; if (snapdump) { mpi3mr_set_diagsave(mrioc); @@ -5335,6 +5458,16 @@ int mpi3mr_soft_reset_handler(struct mpi3mr_ioc *mrioc, ioc_err(mrioc, "Failed to issue soft reset to the ioc\n"); goto out; } + + retval = mpi3mr_check_op_admin_proc(mrioc); + if (retval) { + ioc_err(mrioc, "Soft reset failed due to an Admin or I/O queue polling\n" + "thread still processing replies even after a 10 second\n" + "timeout. Marking the controller as unrecoverable!\n"); + + goto out; + } + if (mrioc->num_io_throttle_group != mrioc->facts.max_io_throttle_group) { ioc_err(mrioc, diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index b9a51d3f2024..e3547ea42613 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -5803,7 +5803,7 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = { }; MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table); -static struct pci_error_handlers mpi3mr_err_handler = { +static const struct pci_error_handlers mpi3mr_err_handler = { .error_detected = mpi3mr_pcierr_error_detected, .mmio_enabled = mpi3mr_pcierr_mmio_enabled, .slot_reset = mpi3mr_pcierr_slot_reset, diff --git a/drivers/scsi/mpt3sas/mpi/mpi2.h b/drivers/scsi/mpt3sas/mpi/mpi2.h index 6de35b32223c..b181b113fc80 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2.h @@ -125,6 +125,12 @@ * 06-24-19 02.00.55 Bumped MPI2_HEADER_VERSION_UNIT * 08-01-19 02.00.56 Bumped MPI2_HEADER_VERSION_UNIT * 10-02-19 02.00.57 Bumped MPI2_HEADER_VERSION_UNIT + * 07-20-20 02.00.58 Bumped MPI2_HEADER_VERSION_UNIT + * 03-30-21 02.00.59 Bumped MPI2_HEADER_VERSION_UNIT + * 06-03-22 02.00.60 Bumped MPI2_HEADER_VERSION_UNIT + * 09-20-23 02.00.61 Bumped MPI2_HEADER_VERSION_UNIT + * 09-13-24 02.00.62 Bumped MPI2_HEADER_VERSION_UNIT + * Added MPI2_FUNCTION_MCTP_PASSTHROUGH * -------------------------------------------------------------------------- */ @@ -165,7 +171,7 @@ /* Unit and Dev versioning for this MPI header set */ -#define MPI2_HEADER_VERSION_UNIT (0x39) +#define MPI2_HEADER_VERSION_UNIT (0x3E) #define MPI2_HEADER_VERSION_DEV (0x00) #define MPI2_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI2_HEADER_VERSION_UNIT_SHIFT (8) @@ -669,6 +675,7 @@ typedef union _MPI2_REPLY_DESCRIPTORS_UNION { #define MPI2_FUNCTION_PWR_MGMT_CONTROL (0x30) #define MPI2_FUNCTION_SEND_HOST_MESSAGE (0x31) #define MPI2_FUNCTION_NVME_ENCAPSULATED (0x33) +#define MPI2_FUNCTION_MCTP_PASSTHROUGH (0x34) #define MPI2_FUNCTION_MIN_PRODUCT_SPECIFIC (0xF0) #define MPI2_FUNCTION_MAX_PRODUCT_SPECIFIC (0xFF) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h index 587f7d248219..77259fc96b94 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_cnfg.h @@ -251,6 +251,7 @@ * 12-17-18 02.00.47 Swap locations of Slotx2 and Slotx4 in ManPage 7. * 08-01-19 02.00.49 Add MPI26_MANPAGE7_FLAG_X2_X4_SLOT_INFO_VALID * Add MPI26_IOUNITPAGE1_NVME_WRCACHE_SHIFT + * 09-13-24 02.00.50 Added PCIe 32 GT/s link rate */ #ifndef MPI2_CNFG_H @@ -1121,6 +1122,7 @@ typedef struct _MPI2_CONFIG_PAGE_IO_UNIT_7 { #define MPI2_IOUNITPAGE7_PCIE_SPEED_5_0_GBPS (0x01) #define MPI2_IOUNITPAGE7_PCIE_SPEED_8_0_GBPS (0x02) #define MPI2_IOUNITPAGE7_PCIE_SPEED_16_0_GBPS (0x03) +#define MPI2_IOUNITPAGE7_PCIE_SPEED_32_0_GBPS (0x04) /*defines for IO Unit Page 7 ProcessorState field */ #define MPI2_IOUNITPAGE7_PSTATE_MASK_SECOND (0x0000000F) @@ -2301,6 +2303,7 @@ typedef struct _MPI2_CONFIG_PAGE_SASIOUNIT_1 { #define MPI2_SASIOUNIT1_CONTROL_CLEAR_AFFILIATION (0x0001) /*values for SAS IO Unit Page 1 AdditionalControlFlags */ +#define MPI2_SASIOUNIT1_ACONTROL_PROD_SPECIFIC_1 (0x8000) #define MPI2_SASIOUNIT1_ACONTROL_DA_PERSIST_CONNECT (0x0100) #define MPI2_SASIOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL (0x0080) #define MPI2_SASIOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION (0x0040) @@ -3591,6 +3594,7 @@ typedef struct _MPI2_CONFIG_PAGE_EXT_MAN_PS { #define MPI26_PCIE_NEG_LINK_RATE_5_0 (0x03) #define MPI26_PCIE_NEG_LINK_RATE_8_0 (0x04) #define MPI26_PCIE_NEG_LINK_RATE_16_0 (0x05) +#define MPI26_PCIE_NEG_LINK_RATE_32_0 (0x06) /**************************************************************************** @@ -3700,6 +3704,7 @@ typedef struct _MPI26_CONFIG_PAGE_PIOUNIT_1 { #define MPI26_PCIEIOUNIT1_MAX_RATE_5_0 (0x30) #define MPI26_PCIEIOUNIT1_MAX_RATE_8_0 (0x40) #define MPI26_PCIEIOUNIT1_MAX_RATE_16_0 (0x50) +#define MPI26_PCIEIOUNIT1_MAX_RATE_32_0 (0x60) /*values for PCIe IO Unit Page 1 DMDReportPCIe */ #define MPI26_PCIEIOUNIT1_DMDRPT_UNIT_MASK (0x80) diff --git a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h index d92852591134..1a279c6e1a9f 100644 --- a/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h +++ b/drivers/scsi/mpt3sas/mpi/mpi2_ioc.h @@ -179,6 +179,7 @@ * Added MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED * Added MPI2_FW_DOWNLOAD_ITYPE_COREDUMP * Added MPI2_FW_UPLOAD_ITYPE_COREDUMP + * 9-13-24 02.00.39 Added MPI26_MCTP_PASSTHROUGH messages * -------------------------------------------------------------------------- */ @@ -382,6 +383,7 @@ typedef struct _MPI2_IOC_FACTS_REPLY { /*ProductID field uses MPI2_FW_HEADER_PID_ */ /*IOCCapabilities */ +#define MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU (0x00800000) #define MPI26_IOCFACTS_CAPABILITY_COREDUMP_ENABLED (0x00200000) #define MPI26_IOCFACTS_CAPABILITY_PCIE_SRIOV (0x00100000) #define MPI26_IOCFACTS_CAPABILITY_ATOMIC_REQ (0x00080000) @@ -1798,5 +1800,57 @@ typedef struct _MPI26_IOUNIT_CONTROL_REPLY { Mpi26IoUnitControlReply_t, *pMpi26IoUnitControlReply_t; +/**************************************************************************** + * MCTP Passthrough messages (MPI v2.6 and later only.) + ****************************************************************************/ + +/* MCTP Passthrough Request Message */ +typedef struct _MPI26_MCTP_PASSTHROUGH_REQUEST { + U8 MsgContext; /* 0x00 */ + U8 Reserved1[2]; /* 0x01 */ + U8 Function; /* 0x03 */ + U8 Reserved2[3]; /* 0x04 */ + U8 MsgFlags; /* 0x07 */ + U8 VP_ID; /* 0x08 */ + U8 VF_ID; /* 0x09 */ + U16 Reserved3; /* 0x0A */ + U32 Reserved4; /* 0x0C */ + U8 Flags; /* 0x10 */ + U8 Reserved5[3]; /* 0x11 */ + U32 Reserved6; /* 0x14 */ + U32 H2DLength; /* 0x18 */ + U32 D2HLength; /* 0x1C */ + MPI25_SGE_IO_UNION H2DSGL; /* 0x20 */ + MPI25_SGE_IO_UNION D2HSGL; /* 0x30 */ +} MPI26_MCTP_PASSTHROUGH_REQUEST, + *PTR_MPI26_MCTP_PASSTHROUGH_REQUEST, + Mpi26MctpPassthroughRequest_t, + *pMpi26MctpPassthroughRequest_t; + +/* values for the MsgContext field */ +#define MPI26_MCTP_MSG_CONEXT_UNUSED (0x00) + +/* values for the Flags field */ +#define MPI26_MCTP_FLAGS_MSG_FORMAT_MPT (0x01) + +/* MCTP Passthrough Reply Message */ +typedef struct _MPI26_MCTP_PASSTHROUGH_REPLY { + U8 MsgContext; /* 0x00 */ + U8 Reserved1; /* 0x01 */ + U8 MsgLength; /* 0x02 */ + U8 Function; /* 0x03 */ + U8 Reserved2[3]; /* 0x04 */ + U8 MsgFlags; /* 0x07 */ + U8 VP_ID; /* 0x08 */ + U8 VF_ID; /* 0x09 */ + U16 Reserved3; /* 0x0A */ + U16 Reserved4; /* 0x0C */ + U16 IOCStatus; /* 0x0E */ + U32 IOCLogInfo; /* 0x10 */ + U32 ResponseDataLength; /* 0x14 */ +} MPI26_MCTP_PASSTHROUGH_REPLY, + *PTR_MPI26_MCTP_PASSTHROUGH_REPLY, + Mpi26MctpPassthroughReply_t, + *pMpi26MctpPassthroughReply_t; #endif diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index dc43cfa83088..bd3efa5b46c7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1202,6 +1202,11 @@ _base_sas_ioc_info(struct MPT3SAS_ADAPTER *ioc, MPI2DefaultReply_t *mpi_reply, ioc->sge_size; func_str = "nvme_encapsulated"; break; + case MPI2_FUNCTION_MCTP_PASSTHROUGH: + frame_sz = sizeof(Mpi26MctpPassthroughRequest_t) + + ioc->sge_size; + func_str = "mctp_passthru"; + break; default: frame_sz = 32; func_str = "unknown"; @@ -4874,6 +4879,12 @@ _base_display_ioc_capabilities(struct MPT3SAS_ADAPTER *ioc) i++; } + if (ioc->facts.IOCCapabilities & + MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU) { + pr_cont("%sMCTP Passthru", i ? "," : ""); + i++; + } + iounit_pg1_flags = le32_to_cpu(ioc->iounit_pg1.Flags); if (!(iounit_pg1_flags & MPI2_IOUNITPAGE1_NATIVE_COMMAND_Q_DISABLE)) { pr_cont("%sNCQ", i ? "," : ""); @@ -8018,7 +8029,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) mutex_lock(&ioc->hostdiag_unlock_mutex); if (mpt3sas_base_unlock_and_get_host_diagnostic(ioc, &host_diagnostic)) - goto out; + goto unlock; hcb_size = ioc->base_readl(&ioc->chip->HCBSize); drsprintk(ioc, ioc_info(ioc, "diag reset: issued\n")); @@ -8038,7 +8049,7 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) ioc_info(ioc, "Invalid host diagnostic register value\n"); _base_dump_reg_set(ioc); - goto out; + goto unlock; } if (!(host_diagnostic & MPI2_DIAG_RESET_ADAPTER)) break; @@ -8074,17 +8085,19 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) ioc_err(ioc, "%s: failed going to ready state (ioc_state=0x%x)\n", __func__, ioc_state); _base_dump_reg_set(ioc); - goto out; + goto fail; } pci_cfg_access_unlock(ioc->pdev); ioc_info(ioc, "diag reset: SUCCESS\n"); return 0; - out: +unlock: + mutex_unlock(&ioc->hostdiag_unlock_mutex); + +fail: pci_cfg_access_unlock(ioc->pdev); ioc_err(ioc, "diag reset: FAILED\n"); - mutex_unlock(&ioc->hostdiag_unlock_mutex); return -EFAULT; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index d8d1a64b4764..939141cde3ca 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -77,8 +77,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>" #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "51.100.00.00" -#define MPT3SAS_MAJOR_VERSION 51 +#define MPT3SAS_DRIVER_VERSION "52.100.00.00" +#define MPT3SAS_MAJOR_VERSION 52 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 00 #define MPT3SAS_RELEASE_VERSION 00 @@ -1858,9 +1858,6 @@ int mpt3sas_config_get_manufacturing_pg0(struct MPT3SAS_ADAPTER *ioc, int mpt3sas_config_get_manufacturing_pg1(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage1_t *config_page); -int mpt3sas_config_get_manufacturing_pg7(struct MPT3SAS_ADAPTER *ioc, - Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage7_t *config_page, - u16 sz); int mpt3sas_config_get_manufacturing_pg10(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, struct Mpi2ManufacturingPage10_t *config_page); @@ -1887,9 +1884,6 @@ int mpt3sas_config_get_iounit_pg0(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t int mpt3sas_config_get_sas_device_pg0(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, Mpi2SasDevicePage0_t *config_page, u32 form, u32 handle); -int mpt3sas_config_get_sas_device_pg1(struct MPT3SAS_ADAPTER *ioc, - Mpi2ConfigReply_t *mpi_reply, Mpi2SasDevicePage1_t *config_page, - u32 form, u32 handle); int mpt3sas_config_get_pcie_device_pg0(struct MPT3SAS_ADAPTER *ioc, Mpi2ConfigReply_t *mpi_reply, Mpi26PCIeDevicePage0_t *config_page, u32 form, u32 handle); diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index 2e88f456fc34..45ac853e1289 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -576,44 +576,6 @@ mpt3sas_config_get_manufacturing_pg1(struct MPT3SAS_ADAPTER *ioc, return r; } -/** - * mpt3sas_config_get_manufacturing_pg7 - obtain manufacturing page 7 - * @ioc: per adapter object - * @mpi_reply: reply mf payload returned from firmware - * @config_page: contents of the config page - * @sz: size of buffer passed in config_page - * Context: sleep. - * - * Return: 0 for success, non-zero for failure. - */ -int -mpt3sas_config_get_manufacturing_pg7(struct MPT3SAS_ADAPTER *ioc, - Mpi2ConfigReply_t *mpi_reply, Mpi2ManufacturingPage7_t *config_page, - u16 sz) -{ - Mpi2ConfigRequest_t mpi_request; - int r; - - memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); - mpi_request.Function = MPI2_FUNCTION_CONFIG; - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; - mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_MANUFACTURING; - mpi_request.Header.PageNumber = 7; - mpi_request.Header.PageVersion = MPI2_MANUFACTURING7_PAGEVERSION; - ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE); - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0); - if (r) - goto out; - - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT; - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, - sz); - out: - return r; -} - /** * mpt3sas_config_get_manufacturing_pg10 - obtain manufacturing page 10 * @ioc: per adapter object @@ -1213,47 +1175,6 @@ mpt3sas_config_get_sas_device_pg0(struct MPT3SAS_ADAPTER *ioc, return r; } -/** - * mpt3sas_config_get_sas_device_pg1 - obtain sas device page 1 - * @ioc: per adapter object - * @mpi_reply: reply mf payload returned from firmware - * @config_page: contents of the config page - * @form: GET_NEXT_HANDLE or HANDLE - * @handle: device handle - * Context: sleep. - * - * Return: 0 for success, non-zero for failure. - */ -int -mpt3sas_config_get_sas_device_pg1(struct MPT3SAS_ADAPTER *ioc, - Mpi2ConfigReply_t *mpi_reply, Mpi2SasDevicePage1_t *config_page, - u32 form, u32 handle) -{ - Mpi2ConfigRequest_t mpi_request; - int r; - - memset(&mpi_request, 0, sizeof(Mpi2ConfigRequest_t)); - mpi_request.Function = MPI2_FUNCTION_CONFIG; - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_HEADER; - mpi_request.Header.PageType = MPI2_CONFIG_PAGETYPE_EXTENDED; - mpi_request.ExtPageType = MPI2_CONFIG_EXTPAGETYPE_SAS_DEVICE; - mpi_request.Header.PageVersion = MPI2_SASDEVICE1_PAGEVERSION; - mpi_request.Header.PageNumber = 1; - ioc->build_zero_len_sge_mpi(ioc, &mpi_request.PageBufferSGE); - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, NULL, 0); - if (r) - goto out; - - mpi_request.PageAddress = cpu_to_le32(form | handle); - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_READ_CURRENT; - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, - sizeof(*config_page)); - out: - return r; -} - /** * mpt3sas_config_get_pcie_device_pg0 - obtain pcie device page 0 * @ioc: per adapter object diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 87784c96249a..ff8fedf5f20e 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -186,6 +186,9 @@ _ctl_display_some_debug(struct MPT3SAS_ADAPTER *ioc, u16 smid, case MPI2_FUNCTION_NVME_ENCAPSULATED: desc = "nvme_encapsulated"; break; + case MPI2_FUNCTION_MCTP_PASSTHROUGH: + desc = "mctp_passthrough"; + break; } if (!desc) @@ -652,6 +655,40 @@ _ctl_set_task_mid(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command *karg, return 0; } +/** + * _ctl_send_mctp_passthru_req - Send an MCTP passthru request + * @ioc: per adapter object + * @mctp_passthru_req: MPI mctp passhthru request from caller + * @psge: pointer to the H2DSGL + * @data_out_dma: DMA buffer for H2D SGL + * @data_out_sz: H2D length + * @data_in_dma: DMA buffer for D2H SGL + * @data_in_sz: D2H length + * @smid: SMID to submit the request + * + */ +static void +_ctl_send_mctp_passthru_req( + struct MPT3SAS_ADAPTER *ioc, + Mpi26MctpPassthroughRequest_t *mctp_passthru_req, void *psge, + dma_addr_t data_out_dma, int data_out_sz, + dma_addr_t data_in_dma, int data_in_sz, + u16 smid) +{ + mctp_passthru_req->H2DLength = data_out_sz; + mctp_passthru_req->D2HLength = data_in_sz; + + /* Build the H2D SGL from the data out buffer */ + ioc->build_sg(ioc, psge, data_out_dma, data_out_sz, 0, 0); + + psge += ioc->sge_size_ieee; + + /* Build the D2H SGL for the data in buffer */ + ioc->build_sg(ioc, psge, 0, 0, data_in_dma, data_in_sz); + + ioc->put_smid_default(ioc, smid); +} + /** * _ctl_do_mpt_command - main handler for MPT3COMMAND opcode * @ioc: per adapter object @@ -679,6 +716,7 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, size_t data_in_sz = 0; long ret; u16 device_handle = MPT3SAS_INVALID_DEVICE_HANDLE; + int tm_ret; issue_reset = 0; @@ -792,6 +830,23 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, init_completion(&ioc->ctl_cmds.done); switch (mpi_request->Function) { + case MPI2_FUNCTION_MCTP_PASSTHROUGH: + { + Mpi26MctpPassthroughRequest_t *mctp_passthru_req = + (Mpi26MctpPassthroughRequest_t *)request; + + if (!(ioc->facts.IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU)) { + ioc_err(ioc, "%s: MCTP Passthrough request not supported\n", + __func__); + mpt3sas_base_free_smid(ioc, smid); + ret = -EINVAL; + goto out; + } + + _ctl_send_mctp_passthru_req(ioc, mctp_passthru_req, psge, data_out_dma, + data_out_sz, data_in_dma, data_in_sz, smid); + break; + } case MPI2_FUNCTION_NVME_ENCAPSULATED: { nvme_encap_request = (Mpi26NVMeEncapsulatedRequest_t *)request; @@ -1120,18 +1175,25 @@ _ctl_do_mpt_command(struct MPT3SAS_ADAPTER *ioc, struct mpt3_ioctl_command karg, if (pcie_device && (!ioc->tm_custom_handling) && (!(mpt3sas_scsih_is_pcie_scsi_device( pcie_device->device_info)))) - mpt3sas_scsih_issue_locked_tm(ioc, + tm_ret = mpt3sas_scsih_issue_locked_tm(ioc, le16_to_cpu(mpi_request->FunctionDependent1), 0, 0, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, 0, pcie_device->reset_timeout, MPI26_SCSITASKMGMT_MSGFLAGS_PROTOCOL_LVL_RST_PCIE); else - mpt3sas_scsih_issue_locked_tm(ioc, + tm_ret = mpt3sas_scsih_issue_locked_tm(ioc, le16_to_cpu(mpi_request->FunctionDependent1), 0, 0, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, 0, 30, MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET); + + if (tm_ret != SUCCESS) { + ioc_info(ioc, + "target reset failed, issue hard reset: handle (0x%04x)\n", + le16_to_cpu(mpi_request->FunctionDependent1)); + mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER); + } } else mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER); } @@ -1200,6 +1262,8 @@ _ctl_getiocinfo(struct MPT3SAS_ADAPTER *ioc, void __user *arg) } karg.bios_version = le32_to_cpu(ioc->bios_pg3.BiosVersion); + karg.driver_capability |= MPT3_IOCTL_IOCINFO_DRIVER_CAP_MCTP_PASSTHRU; + if (copy_to_user(arg, &karg, sizeof(karg))) { pr_err("failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); @@ -2786,6 +2850,217 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, return ret; } +/** + * _ctl_get_mpt_mctp_passthru_adapter - Traverse the IOC list and return the IOC at + * dev_index positionthat support MCTP passhtru + * @dev_index: position in the mpt3sas_ioc_list to search for + * Return pointer to the IOC on success + * NULL if device not found error + */ +static struct MPT3SAS_ADAPTER * +_ctl_get_mpt_mctp_passthru_adapter(int dev_index) +{ + struct MPT3SAS_ADAPTER *ioc = NULL; + int count = 0; + + spin_lock(&gioc_lock); + /* Traverse ioc list and return number of IOC that support MCTP passthru */ + list_for_each_entry(ioc, &mpt3sas_ioc_list, list) { + if (ioc->facts.IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU) { + if (count == dev_index) { + spin_unlock(&gioc_lock); + return 0; + } + } + } + spin_unlock(&gioc_lock); + + return NULL; +} + +/** + * mpt3sas_get_device_count - Retrieve the count of MCTP passthrough + * capable devices managed by the driver. + * + * Returns number of devices that support MCTP passthrough. + */ +int +mpt3sas_get_device_count(void) +{ + int count = 0; + struct MPT3SAS_ADAPTER *ioc = NULL; + + spin_lock(&gioc_lock); + /* Traverse ioc list and return number of IOC that support MCTP passthru */ + list_for_each_entry(ioc, &mpt3sas_ioc_list, list) + if (ioc->facts.IOCCapabilities & MPI26_IOCFACTS_CAPABILITY_MCTP_PASSTHRU) + count++; + + spin_unlock(&gioc_lock); + + return count; +} +EXPORT_SYMBOL(mpt3sas_get_device_count); + +/** + * mpt3sas_send_passthru_cmd - Send an MPI MCTP passthrough command to + * firmware + * @command: The MPI MCTP passthrough command to send to firmware + * + * Returns 0 on success, anything else is error. + */ +int mpt3sas_send_mctp_passthru_req(struct mpt3_passthru_command *command) +{ + struct MPT3SAS_ADAPTER *ioc; + MPI2RequestHeader_t *mpi_request = NULL, *request; + MPI2DefaultReply_t *mpi_reply; + Mpi26MctpPassthroughRequest_t *mctp_passthru_req; + u16 smid; + unsigned long timeout; + u8 issue_reset = 0; + u32 sz; + void *psge; + void *data_out = NULL; + dma_addr_t data_out_dma = 0; + size_t data_out_sz = 0; + void *data_in = NULL; + dma_addr_t data_in_dma = 0; + size_t data_in_sz = 0; + long ret; + + /* Retrieve ioc from dev_index */ + ioc = _ctl_get_mpt_mctp_passthru_adapter(command->dev_index); + if (!ioc) + return -ENODEV; + + mutex_lock(&ioc->pci_access_mutex); + if (ioc->shost_recovery || + ioc->pci_error_recovery || ioc->is_driver_loading || + ioc->remove_host) { + ret = -EAGAIN; + goto unlock_pci_access; + } + + /* Lock the ctl_cmds mutex to ensure a single ctl cmd is pending */ + if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) { + ret = -ERESTARTSYS; + goto unlock_pci_access; + } + + if (ioc->ctl_cmds.status != MPT3_CMD_NOT_USED) { + ioc_err(ioc, "%s: ctl_cmd in use\n", __func__); + ret = -EAGAIN; + goto unlock_ctl_cmds; + } + + ret = mpt3sas_wait_for_ioc(ioc, IOC_OPERATIONAL_WAIT_COUNT); + if (ret) + goto unlock_ctl_cmds; + + mpi_request = (MPI2RequestHeader_t *)command->mpi_request; + if (mpi_request->Function != MPI2_FUNCTION_MCTP_PASSTHROUGH) { + ioc_err(ioc, "%s: Invalid request received, Function 0x%x\n", + __func__, mpi_request->Function); + ret = -EINVAL; + goto unlock_ctl_cmds; + } + + /* Use first reserved smid for passthrough commands */ + smid = ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT + 1; + ret = 0; + ioc->ctl_cmds.status = MPT3_CMD_PENDING; + memset(ioc->ctl_cmds.reply, 0, ioc->reply_sz); + request = mpt3sas_base_get_msg_frame(ioc, smid); + memset(request, 0, ioc->request_sz); + memcpy(request, command->mpi_request, sizeof(Mpi26MctpPassthroughRequest_t)); + ioc->ctl_cmds.smid = smid; + data_out_sz = command->data_out_size; + data_in_sz = command->data_in_size; + + /* obtain dma-able memory for data transfer */ + if (data_out_sz) /* WRITE */ { + data_out = dma_alloc_coherent(&ioc->pdev->dev, data_out_sz, + &data_out_dma, GFP_ATOMIC); + if (!data_out) { + ret = -ENOMEM; + mpt3sas_base_free_smid(ioc, smid); + goto out; + } + memcpy(data_out, command->data_out_buf_ptr, data_out_sz); + + } + + if (data_in_sz) /* READ */ { + data_in = dma_alloc_coherent(&ioc->pdev->dev, data_in_sz, + &data_in_dma, GFP_ATOMIC); + if (!data_in) { + ret = -ENOMEM; + mpt3sas_base_free_smid(ioc, smid); + goto out; + } + } + + psge = &((Mpi26MctpPassthroughRequest_t *)request)->H2DSGL; + + init_completion(&ioc->ctl_cmds.done); + + mctp_passthru_req = (Mpi26MctpPassthroughRequest_t *)request; + + _ctl_send_mctp_passthru_req(ioc, mctp_passthru_req, psge, data_out_dma, + data_out_sz, data_in_dma, data_in_sz, smid); + + timeout = command->timeout; + if (timeout < MPT3_IOCTL_DEFAULT_TIMEOUT) + timeout = MPT3_IOCTL_DEFAULT_TIMEOUT; + + wait_for_completion_timeout(&ioc->ctl_cmds.done, timeout*HZ); + if (!(ioc->ctl_cmds.status & MPT3_CMD_COMPLETE)) { + mpt3sas_check_cmd_timeout(ioc, + ioc->ctl_cmds.status, mpi_request, + sizeof(Mpi26MctpPassthroughRequest_t), issue_reset); + goto issue_host_reset; + } + + mpi_reply = ioc->ctl_cmds.reply; + + /* copy out xdata to user */ + if (data_in_sz) + memcpy(command->data_in_buf_ptr, data_in, data_in_sz); + + /* copy out reply message frame to user */ + if (command->max_reply_bytes) { + sz = min_t(u32, command->max_reply_bytes, ioc->reply_sz); + memcpy(command->reply_frame_buf_ptr, ioc->ctl_cmds.reply, sz); + } + +issue_host_reset: + if (issue_reset) { + ret = -ENODATA; + mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER); + } + +out: + /* free memory associated with sg buffers */ + if (data_in) + dma_free_coherent(&ioc->pdev->dev, data_in_sz, data_in, + data_in_dma); + + if (data_out) + dma_free_coherent(&ioc->pdev->dev, data_out_sz, data_out, + data_out_dma); + + ioc->ctl_cmds.status = MPT3_CMD_NOT_USED; + +unlock_ctl_cmds: + mutex_unlock(&ioc->ctl_cmds.mutex); + +unlock_pci_access: + mutex_unlock(&ioc->pci_access_mutex); + return ret; + +} +EXPORT_SYMBOL(mpt3sas_send_mctp_passthru_req); + /** * _ctl_ioctl - mpt3ctl main ioctl entry point (unlocked) * @file: (struct file) diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.h b/drivers/scsi/mpt3sas/mpt3sas_ctl.h index 171709e91006..483e0549c02f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.h +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.h @@ -160,6 +160,9 @@ struct mpt3_ioctl_pci_info { #define MPT3_IOCTL_INTERFACE_SAS35 (0x07) #define MPT2_IOCTL_VERSION_LENGTH (32) +/* Bits set for mpt3_ioctl_iocinfo.driver_cap */ +#define MPT3_IOCTL_IOCINFO_DRIVER_CAP_MCTP_PASSTHRU 0x1 + /** * struct mpt3_ioctl_iocinfo - generic controller info * @hdr - generic header @@ -175,6 +178,7 @@ struct mpt3_ioctl_pci_info { * @driver_version - driver version - 32 ASCII characters * @rsvd1 - reserved * @scsi_id - scsi id of adapter 0 + * @driver_capability - driver capabilities * @rsvd2 - reserved * @pci_information - pci info (2nd revision) */ @@ -192,7 +196,8 @@ struct mpt3_ioctl_iocinfo { uint8_t driver_version[MPT2_IOCTL_VERSION_LENGTH]; uint8_t rsvd1; uint8_t scsi_id; - uint16_t rsvd2; + uint8_t driver_capability; + uint8_t rsvd2; struct mpt3_ioctl_pci_info pci_information; }; @@ -458,4 +463,46 @@ struct mpt3_enable_diag_sbr_reload { struct mpt3_ioctl_header hdr; }; +/** + * struct mpt3_passthru_command - generic mpt firmware passthru command + * @dev_index - device index + * @timeout - command timeout in seconds. (if zero then use driver default + * value). + * @reply_frame_buf_ptr - MPI reply location + * @data_in_buf_ptr - destination for read + * @data_out_buf_ptr - data source for write + * @max_reply_bytes - maximum number of reply bytes to be sent to app. + * @data_in_size - number bytes for data transfer in (read) + * @data_out_size - number bytes for data transfer out (write) + * @mpi_request - request frame + */ +struct mpt3_passthru_command { + u8 dev_index; + uint32_t timeout; + void *reply_frame_buf_ptr; + void *data_in_buf_ptr; + void *data_out_buf_ptr; + uint32_t max_reply_bytes; + uint32_t data_in_size; + uint32_t data_out_size; + Mpi26MctpPassthroughRequest_t *mpi_request; +}; + +/* + * mpt3sas_get_device_count - Retrieve the count of MCTP passthrough + * capable devices managed by the driver. + * + * Returns number of devices that support MCTP passthrough. + */ +int mpt3sas_get_device_count(void); + +/* + * mpt3sas_send_passthru_cmd - Send an MPI MCTP passthrough command to + * firmware + * @command: The MPI MCTP passthrough command to send to firmware + * + * Returns 0 on success, anything else is error . + */ +int mpt3sas_send_mctp_passthru_req(struct mpt3_passthru_command *command); + #endif /* MPT3SAS_CTL_H_INCLUDED */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index a456e5ec74d8..508861e88d9f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2703,7 +2703,7 @@ scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) ssp_target = 1; if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_SEP) { - sdev_printk(KERN_WARNING, sdev, + sdev_printk(KERN_INFO, sdev, "set ignore_delay_remove for handle(0x%04x)\n", sas_device_priv_data->sas_target->handle); sas_device_priv_data->ignore_delay_remove = 1; @@ -12710,7 +12710,7 @@ static const struct pci_device_id mpt3sas_pci_table[] = { }; MODULE_DEVICE_TABLE(pci, mpt3sas_pci_table); -static struct pci_error_handlers _mpt3sas_err_handler = { +static const struct pci_error_handlers _mpt3sas_err_handler = { .error_detected = scsih_pci_error_detected, .mmio_enabled = scsih_pci_mmio_enabled, .slot_reset = scsih_pci_slot_reset, diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 1444b1f1c4c8..c4592de4fefc 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -151,16 +151,6 @@ static inline u8 mvs_assign_reg_set(struct mvs_info *mvi, return MVS_CHIP_DISP->assign_reg_set(mvi, &dev->taskfileset); } -void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard) -{ - u32 no; - for_each_phy(phy_mask, phy_mask, no) { - if (!(phy_mask & 1)) - continue; - MVS_CHIP_DISP->phy_reset(mvi, no, hard); - } -} - int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, void *funcdata) { diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 19b01f7c4767..09ce3f2241f2 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -425,7 +425,6 @@ struct mvs_task_exec_info { void mvs_get_sas_addr(void *buf, u32 buflen); void mvs_iounmap(void __iomem *regs); int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex); -void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard); int mvs_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, void *funcdata); void mvs_set_sas_addr(struct mvs_info *mvi, int port_id, u32 off_lo, diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index c9539897048a..e87885cc701c 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2876,7 +2876,7 @@ MODULE_DEVICE_TABLE(pci, qedi_pci_tbl); static enum cpuhp_state qedi_cpuhp_state; -static struct pci_error_handlers qedi_err_handler = { +static const struct pci_error_handlers qedi_err_handler = { .error_detected = qedi_io_error_detected, }; diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 1fd2da0264e3..47d74f881948 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2867,7 +2867,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n", cpu_to_le32(upper_32_bits(dma_handle)), cpu_to_le32(lower_32_bits(dma_handle)), - cpu_to_le32(sg_dma_len(sg_next(s)))); + cpu_to_le32(sg_dma_len(s))); remseg--; } dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather " diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 6d16546e1729..9e7a407ba1b9 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -2136,8 +2136,8 @@ qla2x00_write_flash_byte(struct qla_hw_data *ha, uint32_t addr, uint8_t data) * @flash_id: Flash ID * * This function polls the device until bit 7 of what is read matches data - * bit 7 or until data bit 5 becomes a 1. If that hapens, the flash ROM timed - * out (a fatal error). The flash book recommeds reading bit 7 again after + * bit 7 or until data bit 5 becomes a 1. If that happens, the flash ROM timed + * out (a fatal error). The flash book recommends reading bit 7 again after * reading bit 5 as a 1. * * Returns 0 on success, else non-zero. diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 5ceaa4665e5d..2208dcba346e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -71,6 +71,10 @@ static const char *sdebug_version_date = "20210520"; #define NO_ADDITIONAL_SENSE 0x0 #define OVERLAP_ATOMIC_COMMAND_ASC 0x0 #define OVERLAP_ATOMIC_COMMAND_ASCQ 0x23 +#define FILEMARK_DETECTED_ASCQ 0x1 +#define EOP_EOM_DETECTED_ASCQ 0x2 +#define BEGINNING_OF_P_M_DETECTED_ASCQ 0x4 +#define EOD_DETECTED_ASCQ 0x5 #define LOGICAL_UNIT_NOT_READY 0x4 #define LOGICAL_UNIT_COMMUNICATION_FAILURE 0x8 #define UNRECOVERED_READ_ERR 0x11 @@ -80,8 +84,10 @@ static const char *sdebug_version_date = "20210520"; #define INVALID_FIELD_IN_CDB 0x24 #define INVALID_FIELD_IN_PARAM_LIST 0x26 #define WRITE_PROTECTED 0x27 +#define UA_READY_ASC 0x28 #define UA_RESET_ASC 0x29 #define UA_CHANGED_ASC 0x2a +#define TOO_MANY_IN_PARTITION_ASC 0x3b #define TARGET_CHANGED_ASC 0x3f #define LUNS_CHANGED_ASCQ 0x0e #define INSUFF_RES_ASC 0x55 @@ -173,6 +179,37 @@ static const char *sdebug_version_date = "20210520"; #define DEF_ZBC_MAX_OPEN_ZONES 8 #define DEF_ZBC_NR_CONV_ZONES 1 +/* Default parameters for tape drives */ +#define TAPE_DEF_DENSITY 0x0 +#define TAPE_BAD_DENSITY 0x65 +#define TAPE_DEF_BLKSIZE 0 +#define TAPE_MIN_BLKSIZE 512 +#define TAPE_MAX_BLKSIZE 1048576 +#define TAPE_EW 20 +#define TAPE_MAX_PARTITIONS 2 +#define TAPE_UNITS 10000 +#define TAPE_PARTITION_1_UNITS 1000 + +/* The tape block data definitions */ +#define TAPE_BLOCK_FM_FLAG ((u32)0x1 << 30) +#define TAPE_BLOCK_EOD_FLAG ((u32)0x2 << 30) +#define TAPE_BLOCK_MARK_MASK ((u32)0x3 << 30) +#define TAPE_BLOCK_SIZE_MASK (~TAPE_BLOCK_MARK_MASK) +#define TAPE_BLOCK_MARK(a) (a & TAPE_BLOCK_MARK_MASK) +#define TAPE_BLOCK_SIZE(a) (a & TAPE_BLOCK_SIZE_MASK) +#define IS_TAPE_BLOCK_FM(a) ((a & TAPE_BLOCK_FM_FLAG) != 0) +#define IS_TAPE_BLOCK_EOD(a) ((a & TAPE_BLOCK_EOD_FLAG) != 0) + +struct tape_block { + u32 fl_size; + unsigned char data[4]; +}; + +/* Flags for sense data */ +#define SENSE_FLAG_FILEMARK 0x80 +#define SENSE_FLAG_EOM 0x40 +#define SENSE_FLAG_ILI 0x20 + #define SDEBUG_LUN_0_VAL 0 /* bit mask values for sdebug_opts */ @@ -216,7 +253,8 @@ static const char *sdebug_version_date = "20210520"; #define SDEBUG_UA_LUNS_CHANGED 5 #define SDEBUG_UA_MICROCODE_CHANGED 6 /* simulate firmware change */ #define SDEBUG_UA_MICROCODE_CHANGED_WO_RESET 7 -#define SDEBUG_NUM_UAS 8 +#define SDEBUG_UA_NOT_READY_TO_READY 8 +#define SDEBUG_NUM_UAS 9 /* when 1==SDEBUG_OPT_MEDIUM_ERR, a medium error is simulated at this * sector on read commands: */ @@ -262,11 +300,6 @@ static const char *sdebug_version_date = "20210520"; #define SDEB_XA_NOT_IN_USE XA_MARK_1 -static struct kmem_cache *queued_cmd_cache; - -#define TO_QUEUED_CMD(scmd) ((void *)(scmd)->host_scribble) -#define ASSIGN_QUEUED_CMD(scmnd, qc) { (scmnd)->host_scribble = (void *) qc; } - /* Zone types (zbcr05 table 25) */ enum sdebug_z_type { ZBC_ZTYPE_CNV = 0x1, @@ -363,6 +396,19 @@ struct sdebug_dev_info { ktime_t create_ts; /* time since bootup that this device was created */ struct sdeb_zone_state *zstate; + /* For tapes */ + unsigned int tape_blksize; + unsigned int tape_density; + unsigned char tape_partition; + unsigned char tape_nbr_partitions; + unsigned char tape_pending_nbr_partitions; + unsigned int tape_pending_part_0_size; + unsigned int tape_pending_part_1_size; + unsigned char tape_dce; + unsigned int tape_location[TAPE_MAX_PARTITIONS]; + unsigned int tape_eop[TAPE_MAX_PARTITIONS]; + struct tape_block *tape_blocks[TAPE_MAX_PARTITIONS]; + struct dentry *debugfs_entry; struct spinlock list_lock; struct list_head inject_err_list; @@ -409,24 +455,9 @@ struct sdebug_defer { enum sdeb_defer_type defer_t; }; -struct sdebug_device_access_info { - bool atomic_write; - u64 lba; - u32 num; - struct scsi_cmnd *self; -}; - -struct sdebug_queued_cmd { - /* corresponding bit set in in_use_bm[] in owning struct sdebug_queue - * instance indicates this slot is in use. - */ - struct sdebug_defer sd_dp; - struct scsi_cmnd *scmd; - struct sdebug_device_access_info *i; -}; - struct sdebug_scsi_cmd { spinlock_t lock; + struct sdebug_defer sd_dp; }; static atomic_t sdebug_cmnd_count; /* number of incoming commands */ @@ -483,22 +514,27 @@ enum sdeb_opcode_index { SDEB_I_ZONE_OUT = 30, /* 0x94+SA; includes no data xfer */ SDEB_I_ZONE_IN = 31, /* 0x95+SA; all have data-in */ SDEB_I_ATOMIC_WRITE_16 = 32, - SDEB_I_LAST_ELEM_P1 = 33, /* keep this last (previous + 1) */ + SDEB_I_READ_BLOCK_LIMITS = 33, + SDEB_I_LOCATE = 34, + SDEB_I_WRITE_FILEMARKS = 35, + SDEB_I_SPACE = 36, + SDEB_I_FORMAT_MEDIUM = 37, + SDEB_I_LAST_ELEM_P1 = 38, /* keep this last (previous + 1) */ }; static const unsigned char opcode_ind_arr[256] = { /* 0x0; 0x0->0x1f: 6 byte cdbs */ SDEB_I_TEST_UNIT_READY, SDEB_I_REZERO_UNIT, 0, SDEB_I_REQUEST_SENSE, - 0, 0, 0, 0, + SDEB_I_FORMAT_MEDIUM, SDEB_I_READ_BLOCK_LIMITS, 0, 0, SDEB_I_READ, 0, SDEB_I_WRITE, 0, 0, 0, 0, 0, - 0, 0, SDEB_I_INQUIRY, 0, 0, SDEB_I_MODE_SELECT, SDEB_I_RESERVE, - SDEB_I_RELEASE, + SDEB_I_WRITE_FILEMARKS, SDEB_I_SPACE, SDEB_I_INQUIRY, 0, 0, + SDEB_I_MODE_SELECT, SDEB_I_RESERVE, SDEB_I_RELEASE, 0, 0, SDEB_I_MODE_SENSE, SDEB_I_START_STOP, 0, SDEB_I_SEND_DIAG, SDEB_I_ALLOW_REMOVAL, 0, /* 0x20; 0x20->0x3f: 10 byte cdbs */ 0, 0, 0, 0, 0, SDEB_I_READ_CAPACITY, 0, 0, - SDEB_I_READ, 0, SDEB_I_WRITE, 0, 0, 0, 0, SDEB_I_VERIFY, + SDEB_I_READ, 0, SDEB_I_WRITE, SDEB_I_LOCATE, 0, 0, 0, SDEB_I_VERIFY, 0, 0, 0, 0, SDEB_I_PRE_FETCH, SDEB_I_SYNC_CACHE, 0, 0, 0, 0, 0, SDEB_I_WRITE_BUFFER, 0, 0, 0, 0, /* 0x40; 0x40->0x5f: 10 byte cdbs */ @@ -573,6 +609,12 @@ static int resp_open_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_close_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_finish_zone(struct scsi_cmnd *, struct sdebug_dev_info *); static int resp_rwp_zone(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_read_blklimits(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_locate(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_write_filemarks(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_space(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_rewind(struct scsi_cmnd *, struct sdebug_dev_info *); +static int resp_format_medium(struct scsi_cmnd *, struct sdebug_dev_info *); static int sdebug_do_add_host(bool mk_new_store); static int sdebug_add_host_helper(int per_host_idx); @@ -581,8 +623,6 @@ static int sdebug_add_store(void); static void sdebug_erase_store(int idx, struct sdeb_store_info *sip); static void sdebug_erase_all_stores(bool apart_from_first); -static void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp); - /* * The following are overflow arrays for cdbs that "hit" the same index in * the opcode_info_arr array. The most time sensitive (or commonly used) cdb @@ -773,7 +813,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { /* 20 */ {0, 0x1e, 0, 0, NULL, NULL, /* ALLOW REMOVAL */ {6, 0, 0, 0, 0x3, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, - {0, 0x1, 0, 0, resp_start_stop, NULL, /* REWIND ?? */ + {0, 0x1, 0, 0, resp_rewind, NULL, {6, 0x1, 0, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, {0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ATA_PT */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, @@ -800,6 +840,7 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { resp_pre_fetch, pre_fetch_iarr, {10, 0x2, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} }, /* PRE-FETCH (10) */ + /* READ POSITION (10) */ /* 30 */ {ARRAY_SIZE(zone_out_iarr), 0x94, 0x3, F_SA_LOW | F_M_ACCESS, @@ -810,11 +851,23 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEM_P1 + 1] = { resp_report_zones, zone_in_iarr, /* ZONE_IN(16), REPORT ZONES) */ {16, 0x0 /* SA */, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbf, 0xc7} }, -/* 31 */ +/* 32 */ {0, 0x0, 0x0, F_D_OUT | FF_MEDIA_IO, resp_atomic_write, NULL, /* ATOMIC WRITE 16 */ {16, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} }, + {0, 0x05, 0, F_D_IN, resp_read_blklimits, NULL, /* READ BLOCK LIMITS (6) */ + {6, 0, 0, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, + {0, 0x2b, 0, F_D_UNKN, resp_locate, NULL, /* LOCATE (10) */ + {10, 0x07, 0, 0xff, 0xff, 0xff, 0xff, 0, 0xff, 0xc7, 0, 0, + 0, 0, 0, 0} }, + {0, 0x10, 0, F_D_IN, resp_write_filemarks, NULL, /* WRITE FILEMARKS (6) */ + {6, 0x01, 0xff, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, + {0, 0x11, 0, F_D_IN, resp_space, NULL, /* SPACE (6) */ + {6, 0x07, 0xff, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, + {0, 0x4, 0, 0, resp_format_medium, NULL, /* FORMAT MEDIUM (6) */ + {6, 0x3, 0x7, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, +/* 38 */ /* sentinel */ {0xff, 0, 0, 0, NULL, NULL, /* terminating element */ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, @@ -1331,6 +1384,30 @@ static void mk_sense_buffer(struct scsi_cmnd *scp, int key, int asc, int asq) my_name, key, asc, asq); } +/* Sense data that has information fields for tapes */ +static void mk_sense_info_tape(struct scsi_cmnd *scp, int key, int asc, int asq, + unsigned int information, unsigned char tape_flags) +{ + if (!scp->sense_buffer) { + sdev_printk(KERN_ERR, scp->device, + "%s: sense_buffer is NULL\n", __func__); + return; + } + memset(scp->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); + + scsi_build_sense(scp, /* sdebug_dsense */ 0, key, asc, asq); + /* only fixed format so far */ + + scp->sense_buffer[0] |= 0x80; /* valid */ + scp->sense_buffer[2] |= tape_flags; + put_unaligned_be32(information, &scp->sense_buffer[3]); + + if (sdebug_verbose) + sdev_printk(KERN_INFO, scp->device, + "%s: [sense_key,asc,ascq]: [0x%x,0x%x,0x%x]\n", + my_name, key, asc, asq); +} + static void mk_sense_invalid_opcode(struct scsi_cmnd *scp) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_OPCODE, 0); @@ -1493,6 +1570,12 @@ static int make_ua(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) if (sdebug_verbose) cp = "reported luns data has changed"; break; + case SDEBUG_UA_NOT_READY_TO_READY: + mk_sense_buffer(scp, UNIT_ATTENTION, UA_READY_ASC, + 0); + if (sdebug_verbose) + cp = "not ready to ready transition/media change"; + break; default: pr_warn("unexpected unit attention code=%d\n", k); if (sdebug_verbose) @@ -2196,6 +2279,14 @@ static int resp_start_stop(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) changing = (stopped_state != want_stop); if (changing) atomic_xchg(&devip->stopped, want_stop); + if (sdebug_ptype == TYPE_TAPE && !want_stop) { + int i; + + set_bit(SDEBUG_UA_NOT_READY_TO_READY, devip->uas_bm); /* not legal! */ + for (i = 0; i < TAPE_MAX_PARTITIONS; i++) + devip->tape_location[i] = 0; + devip->tape_partition = 0; + } if (!changing || (cmd[1] & 0x1)) /* state unchanged or IMMED bit set in cdb */ return SDEG_RES_IMMED_MASK; else @@ -2728,6 +2819,76 @@ static int resp_sas_sha_m_spg(unsigned char *p, int pcontrol) return sizeof(sas_sha_m_pg); } +static unsigned char partition_pg[] = {0x11, 12, 1, 0, 0x24, 3, 9, 0, + 0xff, 0xff, 0x00, 0x00}; + +static int resp_partition_m_pg(unsigned char *p, int pcontrol, int target) +{ /* Partition page for mode_sense (tape) */ + memcpy(p, partition_pg, sizeof(partition_pg)); + if (pcontrol == 1) + memset(p + 2, 0, sizeof(partition_pg) - 2); + return sizeof(partition_pg); +} + +static int process_medium_part_m_pg(struct sdebug_dev_info *devip, + unsigned char *new, int pg_len) +{ + int new_nbr, p0_size, p1_size; + + if ((new[4] & 0x80) != 0) { /* FDP */ + partition_pg[4] |= 0x80; + devip->tape_pending_nbr_partitions = TAPE_MAX_PARTITIONS; + devip->tape_pending_part_0_size = TAPE_UNITS - TAPE_PARTITION_1_UNITS; + devip->tape_pending_part_1_size = TAPE_PARTITION_1_UNITS; + } else { + new_nbr = new[3] + 1; + if (new_nbr > TAPE_MAX_PARTITIONS) + return 3; + if ((new[4] & 0x40) != 0) { /* SDP */ + p1_size = TAPE_PARTITION_1_UNITS; + p0_size = TAPE_UNITS - p1_size; + if (p0_size < 100) + return 4; + } else if ((new[4] & 0x20) != 0) { + if (new_nbr > 1) { + p0_size = get_unaligned_be16(new + 8); + p1_size = get_unaligned_be16(new + 10); + if (p1_size == 0xFFFF) + p1_size = TAPE_UNITS - p0_size; + else if (p0_size == 0xFFFF) + p0_size = TAPE_UNITS - p1_size; + if (p0_size < 100 || p1_size < 100) + return 8; + } else { + p0_size = TAPE_UNITS; + p1_size = 0; + } + } else + return 6; + devip->tape_pending_nbr_partitions = new_nbr; + devip->tape_pending_part_0_size = p0_size; + devip->tape_pending_part_1_size = p1_size; + partition_pg[3] = new_nbr; + devip->tape_pending_nbr_partitions = new_nbr; + } + + return 0; +} + +static int resp_compression_m_pg(unsigned char *p, int pcontrol, int target, + unsigned char dce) +{ /* Compression page for mode_sense (tape) */ + unsigned char compression_pg[] = {0x0f, 14, 0x40, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 00, 00}; + + memcpy(p, compression_pg, sizeof(compression_pg)); + if (dce) + p[2] |= 0x80; + if (pcontrol == 1) + memset(p + 2, 0, sizeof(compression_pg) - 2); + return sizeof(compression_pg); +} + /* PAGE_SIZE is more than necessary but provides room for future expansion. */ #define SDEBUG_MAX_MSENSE_SZ PAGE_SIZE @@ -2742,7 +2903,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, unsigned char *ap; unsigned char *arr __free(kfree); unsigned char *cmd = scp->cmnd; - bool dbd, llbaa, msense_6, is_disk, is_zbc; + bool dbd, llbaa, msense_6, is_disk, is_zbc, is_tape; arr = kzalloc(SDEBUG_MAX_MSENSE_SZ, GFP_ATOMIC); if (!arr) @@ -2755,7 +2916,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, llbaa = msense_6 ? false : !!(cmd[1] & 0x10); is_disk = (sdebug_ptype == TYPE_DISK); is_zbc = devip->zoned; - if ((is_disk || is_zbc) && !dbd) + is_tape = (sdebug_ptype == TYPE_TAPE); + if ((is_disk || is_zbc || is_tape) && !dbd) bd_len = llbaa ? 16 : 8; else bd_len = 0; @@ -2793,15 +2955,25 @@ static int resp_mode_sense(struct scsi_cmnd *scp, put_unaligned_be32(0xffffffff, ap + 0); else put_unaligned_be32(sdebug_capacity, ap + 0); - put_unaligned_be16(sdebug_sector_size, ap + 6); + if (is_tape) { + ap[0] = devip->tape_density; + put_unaligned_be16(devip->tape_blksize, ap + 6); + } else + put_unaligned_be16(sdebug_sector_size, ap + 6); offset += bd_len; ap = arr + offset; } else if (16 == bd_len) { + if (is_tape) { + mk_sense_invalid_fld(scp, SDEB_IN_DATA, 1, 4); + return check_condition_result; + } put_unaligned_be64((u64)sdebug_capacity, ap + 0); put_unaligned_be32(sdebug_sector_size, ap + 12); offset += bd_len; ap = arr + offset; } + if (cmd[2] == 0) + goto only_bd; /* Only block descriptor requested */ /* * N.B. If len>0 before resp_*_pg() call, then form of that call should be: @@ -2857,6 +3029,18 @@ static int resp_mode_sense(struct scsi_cmnd *scp, } offset += len; break; + case 0xf: /* Compression Mode Page (tape) */ + if (!is_tape) + goto bad_pcode; + len += resp_compression_m_pg(ap, pcontrol, target, devip->tape_dce); + offset += len; + break; + case 0x11: /* Partition Mode Page (tape) */ + if (!is_tape) + goto bad_pcode; + len = resp_partition_m_pg(ap, pcontrol, target); + offset += len; + break; case 0x19: /* if spc==1 then sas phy, control+discover */ if (subpcode > 0x2 && subpcode < 0xff) goto bad_subpcode; @@ -2902,6 +3086,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, default: goto bad_pcode; } +only_bd: if (msense_6) arr[0] = offset - 1; else @@ -2945,8 +3130,34 @@ static int resp_mode_select(struct scsi_cmnd *scp, __func__, param_len, res); md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2); bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6); - off = bd_len + (mselect6 ? 4 : 8); - if (md_len > 2 || off >= res) { + off = (mselect6 ? 4 : 8); + if (sdebug_ptype == TYPE_TAPE) { + int blksize; + + if (bd_len != 8) { + mk_sense_invalid_fld(scp, SDEB_IN_DATA, + mselect6 ? 3 : 6, -1); + return check_condition_result; + } + if (arr[off] == TAPE_BAD_DENSITY) { + mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1); + return check_condition_result; + } + blksize = get_unaligned_be16(arr + off + 6); + if (blksize != 0 && + (blksize < TAPE_MIN_BLKSIZE || + blksize > TAPE_MAX_BLKSIZE || + (blksize % 4) != 0)) { + mk_sense_invalid_fld(scp, SDEB_IN_DATA, 1, -1); + return check_condition_result; + } + devip->tape_density = arr[off]; + devip->tape_blksize = blksize; + } + off += bd_len; + if (off >= res) + return 0; /* No page written, just descriptors */ + if (md_len > 2) { mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1); return check_condition_result; } @@ -2984,6 +3195,25 @@ static int resp_mode_select(struct scsi_cmnd *scp, goto set_mode_changed_ua; } break; + case 0xf: /* Compression mode page */ + if (sdebug_ptype != TYPE_TAPE) + goto bad_pcode; + if ((arr[off + 2] & 0x40) != 0) { + devip->tape_dce = (arr[off + 2] & 0x80) != 0; + return 0; + } + break; + case 0x11: /* Medium Partition Mode Page (tape) */ + if (sdebug_ptype == TYPE_TAPE) { + int fld; + + fld = process_medium_part_m_pg(devip, &arr[off], pg_len); + if (fld == 0) + return 0; + mk_sense_invalid_fld(scp, SDEB_IN_DATA, fld, -1); + return check_condition_result; + } + break; case 0x1c: /* Informational Exceptions Mode page */ if (iec_m_pg[1] == arr[off + 1]) { memcpy(iec_m_pg + 2, arr + off + 2, @@ -2999,6 +3229,10 @@ static int resp_mode_select(struct scsi_cmnd *scp, set_mode_changed_ua: set_bit(SDEBUG_UA_MODE_CHANGED, devip->uas_bm); return 0; + +bad_pcode: + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, 5); + return check_condition_result; } static int resp_temp_l_pg(unsigned char *arr) @@ -3138,6 +3372,265 @@ static int resp_log_sense(struct scsi_cmnd *scp, min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ)); } +enum {SDEBUG_READ_BLOCK_LIMITS_ARR_SZ = 6}; +static int resp_read_blklimits(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + unsigned char arr[SDEBUG_READ_BLOCK_LIMITS_ARR_SZ]; + + arr[0] = 4; + put_unaligned_be24(TAPE_MAX_BLKSIZE, arr + 1); + put_unaligned_be16(TAPE_MIN_BLKSIZE, arr + 4); + return fill_from_dev_buffer(scp, arr, SDEBUG_READ_BLOCK_LIMITS_ARR_SZ); +} + +static int resp_locate(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + unsigned char *cmd = scp->cmnd; + unsigned int i, pos; + struct tape_block *blp; + int partition; + + if ((cmd[1] & 0x02) != 0) { + if (cmd[8] >= devip->tape_nbr_partitions) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 8, -1); + return check_condition_result; + } + devip->tape_partition = cmd[8]; + } + pos = get_unaligned_be32(cmd + 3); + partition = devip->tape_partition; + + for (i = 0, blp = devip->tape_blocks[partition]; + i < pos && i < devip->tape_eop[partition]; i++, blp++) + if (IS_TAPE_BLOCK_EOD(blp->fl_size)) + break; + if (i < pos) { + devip->tape_location[partition] = i; + mk_sense_buffer(scp, BLANK_CHECK, 0x05, 0); + return check_condition_result; + } + devip->tape_location[partition] = pos; + + return 0; +} + +static int resp_write_filemarks(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + unsigned char *cmd = scp->cmnd; + unsigned int i, count, pos; + u32 data; + int partition = devip->tape_partition; + + if ((cmd[1] & 0xfe) != 0) { /* probably write setmarks, not in >= SCSI-3 */ + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 1, 1); + return check_condition_result; + } + count = get_unaligned_be24(cmd + 2); + data = TAPE_BLOCK_FM_FLAG; + for (i = 0, pos = devip->tape_location[partition]; i < count; i++, pos++) { + if (pos >= devip->tape_eop[partition] - 1) { /* don't overwrite EOD */ + devip->tape_location[partition] = devip->tape_eop[partition] - 1; + mk_sense_info_tape(scp, VOLUME_OVERFLOW, NO_ADDITIONAL_SENSE, + EOP_EOM_DETECTED_ASCQ, count, SENSE_FLAG_EOM); + return check_condition_result; + } + (devip->tape_blocks[partition] + pos)->fl_size = data; + } + (devip->tape_blocks[partition] + pos)->fl_size = + TAPE_BLOCK_EOD_FLAG; + devip->tape_location[partition] = pos; + + return 0; +} + +static int resp_space(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + unsigned char *cmd = scp->cmnd, code; + int i = 0, pos, count; + struct tape_block *blp; + int partition = devip->tape_partition; + + count = get_unaligned_be24(cmd + 2); + if ((count & 0x800000) != 0) /* extend negative to 32-bit count */ + count |= 0xff000000; + code = cmd[1] & 0x0f; + + pos = devip->tape_location[partition]; + if (code == 0) { /* blocks */ + if (count < 0) { + count = (-count); + pos -= 1; + for (i = 0, blp = devip->tape_blocks[partition] + pos; i < count; + i++) { + if (pos < 0) + goto is_bop; + else if (IS_TAPE_BLOCK_FM(blp->fl_size)) + goto is_fm; + if (i > 0) { + pos--; + blp--; + } + } + } else if (count > 0) { + for (i = 0, blp = devip->tape_blocks[partition] + pos; i < count; + i++, pos++, blp++) { + if (IS_TAPE_BLOCK_EOD(blp->fl_size)) + goto is_eod; + if (IS_TAPE_BLOCK_FM(blp->fl_size)) { + pos += 1; + goto is_fm; + } + if (pos >= devip->tape_eop[partition]) + goto is_eop; + } + } + } else if (code == 1) { /* filemarks */ + if (count < 0) { + count = (-count); + if (pos == 0) + goto is_bop; + else { + for (i = 0, blp = devip->tape_blocks[partition] + pos; + i < count && pos >= 0; i++, pos--, blp--) { + for (pos--, blp-- ; !IS_TAPE_BLOCK_FM(blp->fl_size) && + pos >= 0; pos--, blp--) + ; /* empty */ + if (pos < 0) + goto is_bop; + } + } + pos += 1; + } else if (count > 0) { + for (i = 0, blp = devip->tape_blocks[partition] + pos; + i < count; i++, pos++, blp++) { + for ( ; !IS_TAPE_BLOCK_FM(blp->fl_size) && + !IS_TAPE_BLOCK_EOD(blp->fl_size) && + pos < devip->tape_eop[partition]; + pos++, blp++) + ; /* empty */ + if (IS_TAPE_BLOCK_EOD(blp->fl_size)) + goto is_eod; + if (pos >= devip->tape_eop[partition]) + goto is_eop; + } + } + } else if (code == 3) { /* EOD */ + for (blp = devip->tape_blocks[partition] + pos; + !IS_TAPE_BLOCK_EOD(blp->fl_size) && pos < devip->tape_eop[partition]; + pos++, blp++) + ; /* empty */ + if (pos >= devip->tape_eop[partition]) + goto is_eop; + } else { + /* sequential filemarks not supported */ + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 8, -1); + return check_condition_result; + } + devip->tape_location[partition] = pos; + return 0; + +is_fm: + devip->tape_location[partition] = pos; + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + FILEMARK_DETECTED_ASCQ, count - i, + SENSE_FLAG_FILEMARK); + return check_condition_result; + +is_eod: + devip->tape_location[partition] = pos; + mk_sense_info_tape(scp, BLANK_CHECK, NO_ADDITIONAL_SENSE, + EOD_DETECTED_ASCQ, count - i, + 0); + return check_condition_result; + +is_bop: + devip->tape_location[partition] = 0; + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + BEGINNING_OF_P_M_DETECTED_ASCQ, count - i, + SENSE_FLAG_EOM); + devip->tape_location[partition] = 0; + return check_condition_result; + +is_eop: + devip->tape_location[partition] = devip->tape_eop[partition] - 1; + mk_sense_info_tape(scp, MEDIUM_ERROR, NO_ADDITIONAL_SENSE, + EOP_EOM_DETECTED_ASCQ, (unsigned int)i, + SENSE_FLAG_EOM); + return check_condition_result; +} + +static int resp_rewind(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + devip->tape_location[devip->tape_partition] = 0; + + return 0; +} + +static int partition_tape(struct sdebug_dev_info *devip, int nbr_partitions, + int part_0_size, int part_1_size) +{ + int i; + + if (part_0_size + part_1_size > TAPE_UNITS) + return -1; + devip->tape_eop[0] = part_0_size; + devip->tape_blocks[0]->fl_size = TAPE_BLOCK_EOD_FLAG; + devip->tape_eop[1] = part_1_size; + devip->tape_blocks[1] = devip->tape_blocks[0] + + devip->tape_eop[0]; + devip->tape_blocks[1]->fl_size = TAPE_BLOCK_EOD_FLAG; + + for (i = 0 ; i < TAPE_MAX_PARTITIONS; i++) + devip->tape_location[i] = 0; + + devip->tape_nbr_partitions = nbr_partitions; + devip->tape_partition = 0; + + partition_pg[3] = nbr_partitions - 1; + put_unaligned_be16(devip->tape_eop[0], partition_pg + 8); + put_unaligned_be16(devip->tape_eop[1], partition_pg + 10); + + return nbr_partitions; +} + +static int resp_format_medium(struct scsi_cmnd *scp, + struct sdebug_dev_info *devip) +{ + int res = 0; + unsigned char *cmd = scp->cmnd; + + if (sdebug_ptype != TYPE_TAPE) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 0, -1); + return check_condition_result; + } + if (cmd[2] > 2) { + mk_sense_invalid_fld(scp, SDEB_IN_DATA, 2, -1); + return check_condition_result; + } + if (cmd[2] != 0) { + if (devip->tape_pending_nbr_partitions > 0) { + res = partition_tape(devip, + devip->tape_pending_nbr_partitions, + devip->tape_pending_part_0_size, + devip->tape_pending_part_1_size); + } else + res = partition_tape(devip, devip->tape_nbr_partitions, + devip->tape_eop[0], devip->tape_eop[1]); + } else + res = partition_tape(devip, 1, TAPE_UNITS, 0); + if (res < 0) + return -EINVAL; + + devip->tape_pending_nbr_partitions = -1; + + return 0; +} + static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip) { return devip->nr_zones != 0; @@ -3871,6 +4364,98 @@ static int prot_verify_read(struct scsi_cmnd *scp, sector_t start_sec, return ret; } +static int resp_read_tape(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +{ + u32 i, num, transfer, size; + u8 *cmd = scp->cmnd; + struct scsi_data_buffer *sdb = &scp->sdb; + int partition = devip->tape_partition; + u32 pos = devip->tape_location[partition]; + struct tape_block *blp; + bool fixed, sili; + + if (cmd[0] != READ_6) { /* Only Read(6) supported */ + mk_sense_invalid_opcode(scp); + return illegal_condition_result; + } + fixed = (cmd[1] & 0x1) != 0; + sili = (cmd[1] & 0x2) != 0; + if (fixed && sili) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 1, 1); + return check_condition_result; + } + + transfer = get_unaligned_be24(cmd + 2); + if (fixed) { + num = transfer; + size = devip->tape_blksize; + } else { + if (transfer < TAPE_MIN_BLKSIZE || + transfer > TAPE_MAX_BLKSIZE) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1); + return check_condition_result; + } + num = 1; + size = transfer; + } + + for (i = 0, blp = devip->tape_blocks[partition] + pos; + i < num && pos < devip->tape_eop[partition]; + i++, pos++, blp++) { + devip->tape_location[partition] = pos + 1; + if (IS_TAPE_BLOCK_FM(blp->fl_size)) { + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + FILEMARK_DETECTED_ASCQ, fixed ? num - i : size, + SENSE_FLAG_FILEMARK); + scsi_set_resid(scp, (num - i) * size); + return check_condition_result; + } + /* Assume no REW */ + if (IS_TAPE_BLOCK_EOD(blp->fl_size)) { + mk_sense_info_tape(scp, BLANK_CHECK, NO_ADDITIONAL_SENSE, + EOD_DETECTED_ASCQ, fixed ? num - i : size, + 0); + devip->tape_location[partition] = pos; + scsi_set_resid(scp, (num - i) * size); + return check_condition_result; + } + sg_zero_buffer(sdb->table.sgl, sdb->table.nents, + size, i * size); + sg_copy_buffer(sdb->table.sgl, sdb->table.nents, + &(blp->data), 4, i * size, false); + if (fixed) { + if (blp->fl_size != devip->tape_blksize) { + scsi_set_resid(scp, (num - i) * size); + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + 0, num - i, + SENSE_FLAG_ILI); + return check_condition_result; + } + } else { + if (blp->fl_size != size) { + if (blp->fl_size < size) + scsi_set_resid(scp, size - blp->fl_size); + if (!sili) { + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + 0, size - blp->fl_size, + SENSE_FLAG_ILI); + return check_condition_result; + } + } + } + } + if (pos >= devip->tape_eop[partition]) { + mk_sense_info_tape(scp, NO_SENSE, NO_ADDITIONAL_SENSE, + EOP_EOM_DETECTED_ASCQ, fixed ? num - i : size, + SENSE_FLAG_EOM); + devip->tape_location[partition] = pos - 1; + return check_condition_result; + } + devip->tape_location[partition] = pos; + + return 0; +} + static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { bool check_prot; @@ -3882,6 +4467,9 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u8 *cmd = scp->cmnd; bool meta_data_locked = false; + if (sdebug_ptype == TYPE_TAPE) + return resp_read_tape(scp, devip); + switch (cmd[0]) { case READ_16: ei_lba = 0; @@ -4178,6 +4766,67 @@ static void unmap_region(struct sdeb_store_info *sip, sector_t lba, } } +static int resp_write_tape(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) +{ + u32 i, num, transfer, size, written = 0; + u8 *cmd = scp->cmnd; + struct scsi_data_buffer *sdb = &scp->sdb; + int partition = devip->tape_partition; + int pos = devip->tape_location[partition]; + struct tape_block *blp; + bool fixed, ew; + + if (cmd[0] != WRITE_6) { /* Only Write(6) supported */ + mk_sense_invalid_opcode(scp); + return illegal_condition_result; + } + + fixed = (cmd[1] & 1) != 0; + transfer = get_unaligned_be24(cmd + 2); + if (fixed) { + num = transfer; + size = devip->tape_blksize; + } else { + if (transfer < TAPE_MIN_BLKSIZE || + transfer > TAPE_MAX_BLKSIZE) { + mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1); + return check_condition_result; + } + num = 1; + size = transfer; + } + + scsi_set_resid(scp, num * transfer); + for (i = 0, blp = devip->tape_blocks[partition] + pos, ew = false; + i < num && pos < devip->tape_eop[partition] - 1; i++, pos++, blp++) { + blp->fl_size = size; + sg_copy_buffer(sdb->table.sgl, sdb->table.nents, + &(blp->data), 4, i * size, true); + written += size; + scsi_set_resid(scp, num * transfer - written); + ew |= (pos == devip->tape_eop[partition] - TAPE_EW); + } + + devip->tape_location[partition] = pos; + blp->fl_size = TAPE_BLOCK_EOD_FLAG; + if (pos >= devip->tape_eop[partition] - 1) { + mk_sense_info_tape(scp, VOLUME_OVERFLOW, + NO_ADDITIONAL_SENSE, EOP_EOM_DETECTED_ASCQ, + fixed ? num - i : transfer, + SENSE_FLAG_EOM); + return check_condition_result; + } + if (ew) { /* early warning */ + mk_sense_info_tape(scp, NO_SENSE, + NO_ADDITIONAL_SENSE, EOP_EOM_DETECTED_ASCQ, + fixed ? num - i : transfer, + SENSE_FLAG_EOM); + return check_condition_result; + } + + return 0; +} + static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { bool check_prot; @@ -4190,6 +4839,9 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) u8 *cmd = scp->cmnd; bool meta_data_locked = false; + if (sdebug_ptype == TYPE_TAPE) + return resp_write_tape(scp, devip); + switch (cmd[0]) { case WRITE_16: ei_lba = 0; @@ -4918,7 +5570,10 @@ static int resp_sync_cache(struct scsi_cmnd *scp, * a GOOD status otherwise. Model a disk with a big cache and yield * CONDITION MET. Actually tries to bring range in main memory into the * cache associated with the CPU(s). + * + * The pcode 0x34 is also used for READ POSITION by tape devices. */ +enum {SDEBUG_READ_POSITION_ARR_SZ = 20}; static int resp_pre_fetch(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { @@ -4930,6 +5585,31 @@ static int resp_pre_fetch(struct scsi_cmnd *scp, struct sdeb_store_info *sip = devip2sip(devip, true); u8 *fsp = sip->storep; + if (sdebug_ptype == TYPE_TAPE) { + if (cmd[0] == PRE_FETCH) { /* READ POSITION (10) */ + int all_length; + unsigned char arr[20]; + unsigned int pos; + + all_length = get_unaligned_be16(cmd + 7); + if ((cmd[1] & 0xfe) != 0 || + all_length != 0) { /* only short form */ + mk_sense_invalid_fld(scp, SDEB_IN_CDB, + all_length ? 7 : 1, 0); + return check_condition_result; + } + memset(arr, 0, SDEBUG_READ_POSITION_ARR_SZ); + arr[1] = devip->tape_partition; + pos = devip->tape_location[devip->tape_partition]; + put_unaligned_be32(pos, arr + 4); + put_unaligned_be32(pos, arr + 8); + return fill_from_dev_buffer(scp, arr, + SDEBUG_READ_POSITION_ARR_SZ); + } + mk_sense_invalid_opcode(scp); + return check_condition_result; + } + if (cmd[0] == PRE_FETCH) { /* 10 byte cdb */ lba = get_unaligned_be32(cmd + 2); nblks = get_unaligned_be16(cmd + 7); @@ -5638,10 +6318,10 @@ static u32 get_tag(struct scsi_cmnd *cmnd) /* Queued (deferred) command completions converge here. */ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) { - struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp); + struct sdebug_scsi_cmd *sdsc = container_of(sd_dp, + typeof(*sdsc), sd_dp); + struct scsi_cmnd *scp = (struct scsi_cmnd *)sdsc - 1; unsigned long flags; - struct scsi_cmnd *scp = sqcp->scmd; - struct sdebug_scsi_cmd *sdsc; bool aborted; if (sdebug_statistics) { @@ -5652,27 +6332,23 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp) if (!scp) { pr_err("scmd=NULL\n"); - goto out; + return; } - sdsc = scsi_cmd_priv(scp); spin_lock_irqsave(&sdsc->lock, flags); aborted = sd_dp->aborted; if (unlikely(aborted)) sd_dp->aborted = false; - ASSIGN_QUEUED_CMD(scp, NULL); spin_unlock_irqrestore(&sdsc->lock, flags); if (aborted) { pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n"); blk_abort_request(scsi_cmd_to_rq(scp)); - goto out; + return; } scsi_done(scp); /* callback to mid level */ -out: - sdebug_free_queued_cmd(sqcp); } /* When high resolution timer goes off this function is called. */ @@ -5835,6 +6511,10 @@ static struct sdebug_dev_info *sdebug_device_create( } else { devip->zoned = false; } + if (sdebug_ptype == TYPE_TAPE) { + devip->tape_density = TAPE_DEF_DENSITY; + devip->tape_blksize = TAPE_DEF_BLKSIZE; + } devip->create_ts = ktime_get_boottime(); atomic_set(&devip->stopped, (sdeb_tur_ms_to_ready > 0 ? 2 : 0)); spin_lock_init(&devip->list_lock); @@ -5905,6 +6585,21 @@ static int scsi_debug_sdev_configure(struct scsi_device *sdp, if (devip == NULL) return 1; /* no resources, will be marked offline */ } + if (sdebug_ptype == TYPE_TAPE) { + if (!devip->tape_blocks[0]) { + devip->tape_blocks[0] = + kcalloc(TAPE_UNITS, sizeof(struct tape_block), + GFP_KERNEL); + if (!devip->tape_blocks[0]) + return 1; + } + devip->tape_pending_nbr_partitions = -1; + if (partition_tape(devip, 1, TAPE_UNITS, 0) < 0) { + kfree(devip->tape_blocks[0]); + devip->tape_blocks[0] = NULL; + return 1; + } + } sdp->hostdata = devip; if (sdebug_no_uld) sdp->no_uld_attach = 1; @@ -5950,31 +6645,41 @@ static void scsi_debug_sdev_destroy(struct scsi_device *sdp) debugfs_remove(devip->debugfs_entry); + if (sdebug_ptype == TYPE_TAPE) { + kfree(devip->tape_blocks[0]); + devip->tape_blocks[0] = NULL; + } + /* make this slot available for re-use */ devip->used = false; sdp->hostdata = NULL; } -/* Returns true if we require the queued memory to be freed by the caller. */ -static bool stop_qc_helper(struct sdebug_defer *sd_dp, - enum sdeb_defer_type defer_t) +/* Returns true if cancelled or not running callback. */ +static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd) { + struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); + struct sdebug_defer *sd_dp = &sdsc->sd_dp; + enum sdeb_defer_type defer_t = READ_ONCE(sd_dp->defer_t); + + lockdep_assert_held(&sdsc->lock); + if (defer_t == SDEB_DEFER_HRT) { int res = hrtimer_try_to_cancel(&sd_dp->hrt); switch (res) { - case 0: /* Not active, it must have already run */ case -1: /* -1 It's executing the CB */ return false; + case 0: /* Not active, it must have already run */ case 1: /* Was active, we've now cancelled */ default: return true; } } else if (defer_t == SDEB_DEFER_WQ) { /* Cancel if pending */ - if (cancel_work_sync(&sd_dp->ew.work)) + if (cancel_work(&sd_dp->ew.work)) return true; - /* Was not pending, so it must have run */ + /* callback may be running, so return false */ return false; } else if (defer_t == SDEB_DEFER_POLL) { return true; @@ -5983,28 +6688,6 @@ static bool stop_qc_helper(struct sdebug_defer *sd_dp, return false; } - -static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd) -{ - enum sdeb_defer_type l_defer_t; - struct sdebug_defer *sd_dp; - struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); - struct sdebug_queued_cmd *sqcp = TO_QUEUED_CMD(cmnd); - - lockdep_assert_held(&sdsc->lock); - - if (!sqcp) - return false; - sd_dp = &sqcp->sd_dp; - l_defer_t = READ_ONCE(sd_dp->defer_t); - ASSIGN_QUEUED_CMD(cmnd, NULL); - - if (stop_qc_helper(sd_dp, l_defer_t)) - sdebug_free_queued_cmd(sqcp); - - return true; -} - /* * Called from scsi_debug_abort() only, which is for timed-out cmd. */ @@ -6076,7 +6759,7 @@ static int sdebug_fail_abort(struct scsi_cmnd *cmnd) static int scsi_debug_abort(struct scsi_cmnd *SCpnt) { - bool ok = scsi_debug_abort_cmnd(SCpnt); + bool aborted = scsi_debug_abort_cmnd(SCpnt); u8 *cmd = SCpnt->cmnd; u8 opcode = cmd[0]; @@ -6085,7 +6768,8 @@ static int scsi_debug_abort(struct scsi_cmnd *SCpnt) if (SDEBUG_OPT_ALL_NOISE & sdebug_opts) sdev_printk(KERN_INFO, SCpnt->device, "%s: command%s found\n", __func__, - ok ? "" : " not"); + aborted ? "" : " not"); + if (sdebug_fail_abort(SCpnt)) { scmd_printk(KERN_INFO, SCpnt, "fail abort command 0x%x\n", @@ -6093,6 +6777,9 @@ static int scsi_debug_abort(struct scsi_cmnd *SCpnt) return FAILED; } + if (aborted == false) + return FAILED; + return SUCCESS; } @@ -6144,6 +6831,22 @@ static int sdebug_fail_lun_reset(struct scsi_cmnd *cmnd) return 0; } +static void scsi_tape_reset_clear(struct sdebug_dev_info *devip) +{ + if (sdebug_ptype == TYPE_TAPE) { + int i; + + devip->tape_blksize = TAPE_DEF_BLKSIZE; + devip->tape_density = TAPE_DEF_DENSITY; + devip->tape_partition = 0; + devip->tape_dce = 0; + for (i = 0; i < TAPE_MAX_PARTITIONS; i++) + devip->tape_location[i] = 0; + devip->tape_pending_nbr_partitions = -1; + /* Don't reset partitioning? */ + } +} + static int scsi_debug_device_reset(struct scsi_cmnd *SCpnt) { struct scsi_device *sdp = SCpnt->device; @@ -6157,8 +6860,10 @@ static int scsi_debug_device_reset(struct scsi_cmnd *SCpnt) sdev_printk(KERN_INFO, sdp, "%s\n", __func__); scsi_debug_stop_all_queued(sdp); - if (devip) + if (devip) { set_bit(SDEBUG_UA_POR, devip->uas_bm); + scsi_tape_reset_clear(devip); + } if (sdebug_fail_lun_reset(SCpnt)) { scmd_printk(KERN_INFO, SCpnt, "fail lun reset 0x%x\n", opcode); @@ -6196,6 +6901,7 @@ static int scsi_debug_target_reset(struct scsi_cmnd *SCpnt) list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) { if (devip->target == sdp->id) { set_bit(SDEBUG_UA_BUS_RESET, devip->uas_bm); + scsi_tape_reset_clear(devip); ++k; } } @@ -6227,6 +6933,7 @@ static int scsi_debug_bus_reset(struct scsi_cmnd *SCpnt) list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) { set_bit(SDEBUG_UA_BUS_RESET, devip->uas_bm); + scsi_tape_reset_clear(devip); ++k; } @@ -6250,6 +6957,7 @@ static int scsi_debug_host_reset(struct scsi_cmnd *SCpnt) list_for_each_entry(devip, &sdbg_host->dev_info_list, dev_list) { set_bit(SDEBUG_UA_BUS_RESET, devip->uas_bm); + scsi_tape_reset_clear(devip); ++k; } } @@ -6366,33 +7074,6 @@ static bool inject_on_this_cmd(void) #define INCLUSIVE_TIMING_MAX_NS 1000000 /* 1 millisecond */ - -void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp) -{ - if (sqcp) - kmem_cache_free(queued_cmd_cache, sqcp); -} - -static struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd) -{ - struct sdebug_queued_cmd *sqcp; - struct sdebug_defer *sd_dp; - - sqcp = kmem_cache_zalloc(queued_cmd_cache, GFP_ATOMIC); - if (!sqcp) - return NULL; - - sd_dp = &sqcp->sd_dp; - - hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); - sd_dp->hrt.function = sdebug_q_cmd_hrt_complete; - INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); - - sqcp->scmd = scmd; - - return sqcp; -} - /* Complete the processing of the thread that queued a SCSI command to this * driver. It either completes the command by calling cmnd_done() or * schedules a hr timer or work queue then returns 0. Returns @@ -6409,7 +7090,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd); unsigned long flags; u64 ns_from_boot = 0; - struct sdebug_queued_cmd *sqcp; struct scsi_device *sdp; struct sdebug_defer *sd_dp; @@ -6441,12 +7121,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, } } - sqcp = sdebug_alloc_queued_cmd(cmnd); - if (!sqcp) { - pr_err("%s no alloc\n", __func__); - return SCSI_MLQUEUE_HOST_BUSY; - } - sd_dp = &sqcp->sd_dp; + sd_dp = &sdsc->sd_dp; if (polled || (ndelay > 0 && ndelay < INCLUSIVE_TIMING_MAX_NS)) ns_from_boot = ktime_get_boottime_ns(); @@ -6494,7 +7169,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (kt <= d) { /* elapsed duration >= kt */ /* call scsi_done() from this thread */ - sdebug_free_queued_cmd(sqcp); scsi_done(cmnd); return 0; } @@ -6507,13 +7181,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, if (polled) { spin_lock_irqsave(&sdsc->lock, flags); sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt); - ASSIGN_QUEUED_CMD(cmnd, sqcp); WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); spin_unlock_irqrestore(&sdsc->lock, flags); } else { /* schedule the invocation of scsi_done() for a later time */ spin_lock_irqsave(&sdsc->lock, flags); - ASSIGN_QUEUED_CMD(cmnd, sqcp); WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_HRT); hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED); /* @@ -6537,13 +7209,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip, sd_dp->issuing_cpu = raw_smp_processor_id(); if (polled) { spin_lock_irqsave(&sdsc->lock, flags); - ASSIGN_QUEUED_CMD(cmnd, sqcp); sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot); WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL); spin_unlock_irqrestore(&sdsc->lock, flags); } else { spin_lock_irqsave(&sdsc->lock, flags); - ASSIGN_QUEUED_CMD(cmnd, sqcp); WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_WQ); schedule_work(&sd_dp->ew.work); spin_unlock_irqrestore(&sdsc->lock, flags); @@ -6835,7 +7505,7 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host) blk_mq_tagset_busy_iter(&host->tag_set, sdebug_submit_queue_iter, &data); if (f >= 0) { - seq_printf(m, " in_use_bm BUSY: %s: %d,%d\n", + seq_printf(m, " BUSY: %s: %d,%d\n", "first,last bits", f, l); } } @@ -7910,12 +8580,6 @@ static int __init scsi_debug_init(void) hosts_to_add = sdebug_add_host; sdebug_add_host = 0; - queued_cmd_cache = KMEM_CACHE(sdebug_queued_cmd, SLAB_HWCACHE_ALIGN); - if (!queued_cmd_cache) { - ret = -ENOMEM; - goto driver_unreg; - } - sdebug_debugfs_root = debugfs_create_dir("scsi_debug", NULL); if (IS_ERR_OR_NULL(sdebug_debugfs_root)) pr_info("%s: failed to create initial debugfs directory\n", __func__); @@ -7942,8 +8606,6 @@ static int __init scsi_debug_init(void) return 0; -driver_unreg: - driver_unregister(&sdebug_driverfs_driver); bus_unreg: bus_unregister(&pseudo_lld_bus); dev_unreg: @@ -7959,7 +8621,6 @@ static void __exit scsi_debug_exit(void) for (; k; k--) sdebug_do_remove_host(true); - kmem_cache_destroy(queued_cmd_cache); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); @@ -8343,7 +9004,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) struct sdebug_defer *sd_dp; u32 unique_tag = blk_mq_unique_tag(rq); u16 hwq = blk_mq_unique_tag_to_hwq(unique_tag); - struct sdebug_queued_cmd *sqcp; unsigned long flags; int queue_num = data->queue_num; ktime_t time; @@ -8359,13 +9019,7 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) time = ktime_get_boottime(); spin_lock_irqsave(&sdsc->lock, flags); - sqcp = TO_QUEUED_CMD(cmd); - if (!sqcp) { - spin_unlock_irqrestore(&sdsc->lock, flags); - return true; - } - - sd_dp = &sqcp->sd_dp; + sd_dp = &sdsc->sd_dp; if (READ_ONCE(sd_dp->defer_t) != SDEB_DEFER_POLL) { spin_unlock_irqrestore(&sdsc->lock, flags); return true; @@ -8375,8 +9029,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) spin_unlock_irqrestore(&sdsc->lock, flags); return true; } - - ASSIGN_QUEUED_CMD(cmd, NULL); spin_unlock_irqrestore(&sdsc->lock, flags); if (sdebug_statistics) { @@ -8385,8 +9037,6 @@ static bool sdebug_blk_mq_poll_iter(struct request *rq, void *opaque) atomic_inc(&sdebug_miss_cpus); } - sdebug_free_queued_cmd(sqcp); - scsi_done(cmd); /* callback to mid level */ (*data->num_entries)++; return true; @@ -8701,8 +9351,12 @@ static int scsi_debug_queuecommand(struct Scsi_Host *shost, static int sdebug_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmd); + struct sdebug_defer *sd_dp = &sdsc->sd_dp; spin_lock_init(&sdsc->lock); + hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + sd_dp->hrt.function = sdebug_q_cmd_hrt_complete; + INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete); return 0; } diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 815e7d63f3e2..ec60e8a96b11 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -547,6 +547,18 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) scsi_report_sense(sdev, &sshdr); + if (sshdr.sense_key == UNIT_ATTENTION) { + /* + * Increment the counters for Power on/Reset or New Media so + * that all ULDs interested in these can see that those have + * happened, even if someone else gets the sense data. + */ + if (sshdr.asc == 0x28) + scmd->device->ua_new_media_ctr++; + else if (sshdr.asc == 0x29) + scmd->device->ua_por_ctr++; + } + if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d776f13cd160..f1cfe0bb89b2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -872,13 +872,18 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) case 0x1a: /* start stop unit in progress */ case 0x1b: /* sanitize in progress */ case 0x1d: /* configuration in progress */ - case 0x24: /* depopulation in progress */ - case 0x25: /* depopulation restore in progress */ action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ action = ACTION_DELAYED_REPREP; break; + /* + * Depopulation might take many hours, + * thus it is not worthwhile to retry. + */ + case 0x24: /* depopulation in progress */ + case 0x25: /* depopulation restore in progress */ + fallthrough; default: action = ACTION_FAIL; break; @@ -1664,13 +1669,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); - /* - * Only clear the driver-private command data if the LLD does not supply - * a function to initialize that data. - */ - if (!shost->hostt->init_cmd_priv) - memset(cmd + 1, 0, shost->hostt->cmd_size); - cmd->prot_op = SCSI_PROT_NORMAL; if (blk_rq_bytes(req)) cmd->sc_data_direction = rq_dma_dir(req); @@ -1837,6 +1835,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; + /* + * Only clear the driver-private command data if the LLD does not supply + * a function to initialize that data. + */ + if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) + memset(cmd + 1, 0, shost->hostt->cmd_size); + if (!(req->rq_flags & RQF_DONTPREP)) { ret = scsi_prepare_cmd(req); if (ret != BLK_STS_OK) diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c index 99834426a100..ae8af0e0047a 100644 --- a/drivers/scsi/scsi_lib_test.c +++ b/drivers/scsi/scsi_lib_test.c @@ -67,6 +67,13 @@ static void scsi_lib_test_multiple_sense(struct kunit *test) }; int i; + /* Success */ + sc.result = 0; + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); + /* Command failed but caller did not pass in a failures array */ + scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); /* Match end of array */ scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087fcbfc9aaa..96d7e1a9a7c7 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -246,7 +246,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, } ret = sbitmap_init_node(&sdev->budget_map, scsi_device_max_queue_depth(sdev), - new_shift, GFP_KERNEL, + new_shift, GFP_NOIO, sdev->request_queue->node, false, true); if (!ret) sbitmap_resize(&sdev->budget_map, depth); diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index ebbd50ec0cda..85867120c8a9 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -163,9 +163,11 @@ static const char *st_formats[] = { static int debugging = DEBUG; +/* Setting these non-zero may risk recognizing resets */ #define MAX_RETRIES 0 #define MAX_WRITE_RETRIES 0 #define MAX_READY_RETRIES 0 + #define NO_TAPE NOT_READY #define ST_TIMEOUT (900 * HZ) @@ -357,10 +359,18 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt) { int result = SRpnt->result; u8 scode; + unsigned int ctr; DEB(const char *stp;) char *name = STp->name; struct st_cmdstatus *cmdstatp; + ctr = scsi_get_ua_por_ctr(STp->device); + if (ctr != STp->por_ctr) { + STp->por_ctr = ctr; + STp->pos_unknown = 1; /* ASC => power on / reset */ + st_printk(KERN_WARNING, STp, "Power on/reset recognized."); + } + if (!result) return 0; @@ -413,10 +423,11 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt) if (cmdstatp->have_sense && cmdstatp->sense_hdr.asc == 0 && cmdstatp->sense_hdr.ascq == 0x17) STp->cleaning_req = 1; /* ASC and ASCQ => cleaning requested */ - if (cmdstatp->have_sense && scode == UNIT_ATTENTION && cmdstatp->sense_hdr.asc == 0x29) + if (cmdstatp->have_sense && scode == UNIT_ATTENTION && + cmdstatp->sense_hdr.asc == 0x29 && !STp->pos_unknown) { STp->pos_unknown = 1; /* ASC => power on / reset */ - - STp->pos_unknown |= STp->device->was_reset; + st_printk(KERN_WARNING, STp, "Power on/reset recognized."); + } if (cmdstatp->have_sense && scode == RECOVERED_ERROR @@ -952,7 +963,6 @@ static void reset_state(struct scsi_tape *STp) STp->partition = find_partition(STp); if (STp->partition < 0) STp->partition = 0; - STp->new_partition = STp->partition; } } @@ -969,6 +979,7 @@ static int test_ready(struct scsi_tape *STp, int do_wait) { int attentions, waits, max_wait, scode; int retval = CHKRES_READY, new_session = 0; + unsigned int ctr; unsigned char cmd[MAX_COMMAND_SIZE]; struct st_request *SRpnt = NULL; struct st_cmdstatus *cmdstatp = &STp->buffer->cmdstat; @@ -1025,6 +1036,13 @@ static int test_ready(struct scsi_tape *STp, int do_wait) } } + ctr = scsi_get_ua_new_media_ctr(STp->device); + if (ctr != STp->new_media_ctr) { + STp->new_media_ctr = ctr; + new_session = 1; + DEBC_printk(STp, "New tape session."); + } + retval = (STp->buffer)->syscall_result; if (!retval) retval = new_session ? CHKRES_NEW_SESSION : CHKRES_READY; @@ -2930,14 +2948,17 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon if (cmd_in == MTSETDENSITY) { (STp->buffer)->b_data[4] = arg; STp->density_changed = 1; /* At least we tried ;-) */ + STp->changed_density = arg; } else if (cmd_in == SET_DENS_AND_BLK) (STp->buffer)->b_data[4] = arg >> 24; else (STp->buffer)->b_data[4] = STp->density; if (cmd_in == MTSETBLK || cmd_in == SET_DENS_AND_BLK) { ltmp = arg & MT_ST_BLKSIZE_MASK; - if (cmd_in == MTSETBLK) + if (cmd_in == MTSETBLK) { STp->blksize_changed = 1; /* At least we tried ;-) */ + STp->changed_blksize = arg; + } } else ltmp = STp->block_size; (STp->buffer)->b_data[9] = (ltmp >> 16); @@ -3636,9 +3657,23 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) retval = (-EIO); goto out; } - reset_state(STp); - /* remove this when the midlevel properly clears was_reset */ - STp->device->was_reset = 0; + reset_state(STp); /* Clears pos_unknown */ + + /* Fix the device settings after reset, ignore errors */ + if (mtc.mt_op == MTREW || mtc.mt_op == MTSEEK || + mtc.mt_op == MTEOM) { + if (STp->can_partitions) { + /* STp->new_partition contains the + * latest partition set + */ + STp->partition = 0; + switch_partition(STp); + } + if (STp->density_changed) + st_int_ioctl(STp, MTSETDENSITY, STp->changed_density); + if (STp->blksize_changed) + st_int_ioctl(STp, MTSETBLK, STp->changed_blksize); + } } if (mtc.mt_op != MTNOP && mtc.mt_op != MTSETBLK && @@ -4384,6 +4419,9 @@ static int st_probe(struct device *dev) goto out_idr_remove; } + tpnt->new_media_ctr = scsi_get_ua_new_media_ctr(SDp); + tpnt->por_ctr = scsi_get_ua_por_ctr(SDp); + dev_set_drvdata(dev, tpnt); @@ -4665,6 +4703,24 @@ options_show(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR_RO(options); +/** + * position_lost_in_reset_show - Value 1 indicates that reads, writes, etc. + * are blocked because a device reset has occurred and no operation positioning + * the tape has been issued. + * @dev: struct device + * @attr: attribute structure + * @buf: buffer to return formatted data in + */ +static ssize_t position_lost_in_reset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct st_modedef *STm = dev_get_drvdata(dev); + struct scsi_tape *STp = STm->tape; + + return sprintf(buf, "%d", STp->pos_unknown); +} +static DEVICE_ATTR_RO(position_lost_in_reset); + /* Support for tape stats */ /** @@ -4849,6 +4905,7 @@ static struct attribute *st_dev_attrs[] = { &dev_attr_default_density.attr, &dev_attr_default_compression.attr, &dev_attr_options.attr, + &dev_attr_position_lost_in_reset.attr, NULL, }; diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h index 1aaaf5369a40..0d7c4b8c2c8a 100644 --- a/drivers/scsi/st.h +++ b/drivers/scsi/st.h @@ -165,6 +165,7 @@ struct scsi_tape { unsigned char compression_changed; unsigned char drv_buffer; unsigned char density; + unsigned char changed_density; unsigned char door_locked; unsigned char autorew_dev; /* auto-rewind device */ unsigned char rew_at_close; /* rewind necessary at close */ @@ -172,11 +173,16 @@ struct scsi_tape { unsigned char cleaning_req; /* cleaning requested? */ unsigned char first_tur; /* first TEST UNIT READY */ int block_size; + int changed_blksize; int min_block; int max_block; int recover_count; /* From tape opening */ int recover_reg; /* From last status call */ + /* The saved values of midlevel counters */ + unsigned int new_media_ctr; + unsigned int por_ctr; + #if DEBUG unsigned char write_pending; int nbr_finished; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5a101ac06c47..a8614e54544e 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1800,6 +1800,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) length = scsi_bufflen(scmnd); payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; + payload->range.len = 0; payload_sz = 0; if (scsi_sg_count(scmnd)) { diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index 4783ab1adb8d..a3e88ced328a 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -365,7 +365,7 @@ static void smp2p_irq_print_chip(struct irq_data *irqd, struct seq_file *p) { struct smp2p_entry *entry = irq_data_get_irq_chip_data(irqd); - seq_printf(p, " %8s", dev_name(entry->smp2p->dev)); + seq_printf(p, "%8s", dev_name(entry->smp2p->dev)); } static struct irq_chip smp2p_irq_chip = { diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 16e3ded98c32..832588f21f91 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -212,7 +212,7 @@ int iscsi_target_check_login_request( if ((login_req->max_version != login->version_max) || (login_req->min_version != login->version_min)) { - pr_err("Login request changed Version Max/Nin" + pr_err("Login request changed Version Max/Min" " unexpectedly to 0x%02x/0x%02x, protocol error\n", login_req->max_version, login_req->min_version); iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, @@ -557,7 +557,7 @@ static void iscsi_target_do_login_rx(struct work_struct *work) * before initial PDU processing in iscsi_target_start_negotiation() * has completed, go ahead and retry until it's cleared. * - * Otherwise if the TCP connection drops while this is occuring, + * Otherwise if the TCP connection drops while this is occurring, * iscsi_target_start_negotiation() will detect the failure, call * cancel_delayed_work_sync(&conn->login_work), and cleanup the * remaining iscsi connection resources from iscsi_np process context. @@ -1050,7 +1050,7 @@ static int iscsi_target_do_login(struct iscsit_conn *conn, struct iscsi_login *l /* * Check to make sure the TCP connection has not * dropped asynchronously while session reinstatement - * was occuring in this kthread context, before + * was occurring in this kthread context, before * transitioning to full feature phase operation. */ if (iscsi_target_sk_check_close(conn)) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index d1ae3df069a4..cc2da086f96e 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1078,8 +1078,8 @@ passthrough_parse_cdb(struct se_cmd *cmd, if (!dev->dev_attrib.emulate_pr && ((cdb[0] == PERSISTENT_RESERVE_IN) || (cdb[0] == PERSISTENT_RESERVE_OUT) || - (cdb[0] == RELEASE || cdb[0] == RELEASE_10) || - (cdb[0] == RESERVE || cdb[0] == RESERVE_10))) { + (cdb[0] == RELEASE_6 || cdb[0] == RELEASE_10) || + (cdb[0] == RESERVE_6 || cdb[0] == RESERVE_10))) { return TCM_UNSUPPORTED_SCSI_OPCODE; } @@ -1101,7 +1101,7 @@ passthrough_parse_cdb(struct se_cmd *cmd, return target_cmd_size_check(cmd, size); } - if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) { + if (cdb[0] == RELEASE_6 || cdb[0] == RELEASE_10) { cmd->execute_cmd = target_scsi2_reservation_release; if (cdb[0] == RELEASE_10) size = get_unaligned_be16(&cdb[7]); @@ -1109,7 +1109,7 @@ passthrough_parse_cdb(struct se_cmd *cmd, size = cmd->data_length; return target_cmd_size_check(cmd, size); } - if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) { + if (cdb[0] == RESERVE_6 || cdb[0] == RESERVE_10) { cmd->execute_cmd = target_scsi2_reservation_reserve; if (cdb[0] == RESERVE_10) size = get_unaligned_be16(&cdb[7]); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 4f4ad6af416c..34cf2c399b39 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -91,7 +91,7 @@ target_scsi2_reservation_check(struct se_cmd *cmd) switch (cmd->t_task_cdb[0]) { case INQUIRY: - case RELEASE: + case RELEASE_6: case RELEASE_10: return 0; default: @@ -418,12 +418,12 @@ static int core_scsi3_pr_seq_non_holder(struct se_cmd *cmd, u32 pr_reg_type, return -EINVAL; } break; - case RELEASE: + case RELEASE_6: case RELEASE_10: /* Handled by CRH=1 in target_scsi2_reservation_release() */ ret = 0; break; - case RESERVE: + case RESERVE_6: case RESERVE_10: /* Handled by CRH=1 in target_scsi2_reservation_reserve() */ ret = 0; diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index ea14a3835681..0a02492bef70 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -1674,9 +1674,9 @@ static bool tcm_is_pr_enabled(struct target_opcode_descriptor *descr, return true; switch (descr->opcode) { - case RESERVE: + case RESERVE_6: case RESERVE_10: - case RELEASE: + case RELEASE_6: case RELEASE_10: /* * The pr_ops which are used by the backend modules don't @@ -1828,9 +1828,9 @@ static struct target_opcode_descriptor tcm_opcode_pro_register_move = { static struct target_opcode_descriptor tcm_opcode_release = { .support = SCSI_SUPPORT_FULL, - .opcode = RELEASE, + .opcode = RELEASE_6, .cdb_size = 6, - .usage_bits = {RELEASE, 0x00, 0x00, 0x00, + .usage_bits = {RELEASE_6, 0x00, 0x00, 0x00, 0x00, SCSI_CONTROL_MASK}, .enabled = tcm_is_pr_enabled, }; @@ -1847,9 +1847,9 @@ static struct target_opcode_descriptor tcm_opcode_release10 = { static struct target_opcode_descriptor tcm_opcode_reserve = { .support = SCSI_SUPPORT_FULL, - .opcode = RESERVE, + .opcode = RESERVE_6, .cdb_size = 6, - .usage_bits = {RESERVE, 0x00, 0x00, 0x00, + .usage_bits = {RESERVE_6, 0x00, 0x00, 0x00, 0x00, SCSI_CONTROL_MASK}, .enabled = tcm_is_pr_enabled, }; @@ -2151,8 +2151,10 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) if (descr->serv_action_valid) return TCM_INVALID_CDB_FIELD; - if (!descr->enabled || descr->enabled(descr, cmd)) + if (!descr->enabled || descr->enabled(descr, cmd)) { *opcode = descr; + return TCM_NO_SENSE; + } break; case 0x2: /* @@ -2166,8 +2168,10 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) if (descr->serv_action_valid && descr->service_action == requested_sa) { if (!descr->enabled || descr->enabled(descr, - cmd)) + cmd)) { *opcode = descr; + return TCM_NO_SENSE; + } } else if (!descr->serv_action_valid) return TCM_INVALID_CDB_FIELD; break; @@ -2180,13 +2184,15 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode) */ if (descr->service_action == requested_sa) if (!descr->enabled || descr->enabled(descr, - cmd)) + cmd)) { *opcode = descr; + return TCM_NO_SENSE; + } break; } } - return 0; + return TCM_NO_SENSE; } static sense_reason_t @@ -2243,7 +2249,7 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd) response_length += spc_rsoc_encode_command_descriptor( &buf[response_length], rctd, descr); } - put_unaligned_be32(response_length - 3, buf); + put_unaligned_be32(response_length - 4, buf); } else { response_length = spc_rsoc_encode_one_command_descriptor( &buf[response_length], rctd, descr, @@ -2267,9 +2273,9 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) unsigned char *cdb = cmd->t_task_cdb; switch (cdb[0]) { - case RESERVE: + case RESERVE_6: case RESERVE_10: - case RELEASE: + case RELEASE_6: case RELEASE_10: if (!dev->dev_attrib.emulate_pr) return TCM_UNSUPPORTED_SCSI_OPCODE; @@ -2313,7 +2319,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) *size = get_unaligned_be32(&cdb[5]); cmd->execute_cmd = target_scsi3_emulate_pr_out; break; - case RELEASE: + case RELEASE_6: case RELEASE_10: if (cdb[0] == RELEASE_10) *size = get_unaligned_be16(&cdb[7]); @@ -2322,7 +2328,7 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size) cmd->execute_cmd = target_scsi2_reservation_release; break; - case RESERVE: + case RESERVE_6: case RESERVE_10: /* * The SPC-2 RESERVE does not contain a size in the SCSI CDB. diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index c42cbde8a31b..210648a0092e 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -117,9 +117,9 @@ static ssize_t target_stat_tgt_status_show(struct config_item *item, char *page) { if (to_stat_tgt_dev(item)->export_count) - return snprintf(page, PAGE_SIZE, "activated"); + return snprintf(page, PAGE_SIZE, "activated\n"); else - return snprintf(page, PAGE_SIZE, "deactivated"); + return snprintf(page, PAGE_SIZE, "deactivated\n"); } static ssize_t target_stat_tgt_non_access_lus_show(struct config_item *item, diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index df08f13052ff..8bb1a01fef2a 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -798,7 +798,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ - filp->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index 3438269a5440..90b5ab60f5ae 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -458,6 +458,14 @@ static ssize_t pm_qos_enable_store(struct device *dev, return count; } +static ssize_t critical_health_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", hba->critical_health_count); +} + static DEVICE_ATTR_RW(rpm_lvl); static DEVICE_ATTR_RO(rpm_target_dev_state); static DEVICE_ATTR_RO(rpm_target_link_state); @@ -470,6 +478,7 @@ static DEVICE_ATTR_RW(enable_wb_buf_flush); static DEVICE_ATTR_RW(wb_flush_threshold); static DEVICE_ATTR_RW(rtc_update_ms); static DEVICE_ATTR_RW(pm_qos_enable); +static DEVICE_ATTR_RO(critical_health); static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rpm_lvl.attr, @@ -484,6 +493,7 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_wb_flush_threshold.attr, &dev_attr_rtc_update_ms.attr, &dev_attr_pm_qos_enable.attr, + &dev_attr_critical_health.attr, NULL }; diff --git a/drivers/ufs/core/ufs_bsg.c b/drivers/ufs/core/ufs_bsg.c index 8d4ad0a3f2cf..252186124669 100644 --- a/drivers/ufs/core/ufs_bsg.c +++ b/drivers/ufs/core/ufs_bsg.c @@ -194,10 +194,12 @@ static int ufs_bsg_request(struct bsg_job *job) ufshcd_rpm_put_sync(hba); kfree(buff); bsg_reply->result = ret; - job->reply_len = !rpmb ? sizeof(struct ufs_bsg_reply) : sizeof(struct ufs_rpmb_reply); /* complete the job here only if no error */ - if (ret == 0) + if (ret == 0) { + job->reply_len = rpmb ? sizeof(struct ufs_rpmb_reply) : + sizeof(struct ufs_bsg_reply); bsg_job_done(job, ret, bsg_reply->reply_payload_rcv_len); + } return ret; } diff --git a/drivers/ufs/core/ufs_trace.h b/drivers/ufs/core/ufs_trace.h index 84deca2b841d..caa32e23ffa5 100644 --- a/drivers/ufs/core/ufs_trace.h +++ b/drivers/ufs/core/ufs_trace.h @@ -83,34 +83,34 @@ UFS_CMD_TRACE_TSF_TYPES TRACE_EVENT(ufshcd_clk_gating, - TP_PROTO(const char *dev_name, int state), + TP_PROTO(struct ufs_hba *hba, int state), - TP_ARGS(dev_name, state), + TP_ARGS(hba, state), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __field(int, state) ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __entry->state = state; ), TP_printk("%s: gating state changed to %s", - __get_str(dev_name), + dev_name(__entry->hba->dev), __print_symbolic(__entry->state, UFSCHD_CLK_GATING_STATES)) ); TRACE_EVENT(ufshcd_clk_scaling, - TP_PROTO(const char *dev_name, const char *state, const char *clk, + TP_PROTO(struct ufs_hba *hba, const char *state, const char *clk, u32 prev_state, u32 curr_state), - TP_ARGS(dev_name, state, clk, prev_state, curr_state), + TP_ARGS(hba, state, clk, prev_state, curr_state), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __string(state, state) __string(clk, clk) __field(u32, prev_state) @@ -118,7 +118,7 @@ TRACE_EVENT(ufshcd_clk_scaling, ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __assign_str(state); __assign_str(clk); __entry->prev_state = prev_state; @@ -126,80 +126,80 @@ TRACE_EVENT(ufshcd_clk_scaling, ), TP_printk("%s: %s %s from %u to %u Hz", - __get_str(dev_name), __get_str(state), __get_str(clk), + dev_name(__entry->hba->dev), __get_str(state), __get_str(clk), __entry->prev_state, __entry->curr_state) ); TRACE_EVENT(ufshcd_auto_bkops_state, - TP_PROTO(const char *dev_name, const char *state), + TP_PROTO(struct ufs_hba *hba, const char *state), - TP_ARGS(dev_name, state), + TP_ARGS(hba, state), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __string(state, state) ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __assign_str(state); ), TP_printk("%s: auto bkops - %s", - __get_str(dev_name), __get_str(state)) + dev_name(__entry->hba->dev), __get_str(state)) ); DECLARE_EVENT_CLASS(ufshcd_profiling_template, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, + TP_PROTO(struct ufs_hba *hba, const char *profile_info, s64 time_us, int err), - TP_ARGS(dev_name, profile_info, time_us, err), + TP_ARGS(hba, profile_info, time_us, err), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __string(profile_info, profile_info) __field(s64, time_us) __field(int, err) ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __assign_str(profile_info); __entry->time_us = time_us; __entry->err = err; ), TP_printk("%s: %s: took %lld usecs, err %d", - __get_str(dev_name), __get_str(profile_info), + dev_name(__entry->hba->dev), __get_str(profile_info), __entry->time_us, __entry->err) ); DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_hibern8, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, + TP_PROTO(struct ufs_hba *hba, const char *profile_info, s64 time_us, int err), - TP_ARGS(dev_name, profile_info, time_us, err)); + TP_ARGS(hba, profile_info, time_us, err)); DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_gating, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, + TP_PROTO(struct ufs_hba *hba, const char *profile_info, s64 time_us, int err), - TP_ARGS(dev_name, profile_info, time_us, err)); + TP_ARGS(hba, profile_info, time_us, err)); DEFINE_EVENT(ufshcd_profiling_template, ufshcd_profile_clk_scaling, - TP_PROTO(const char *dev_name, const char *profile_info, s64 time_us, + TP_PROTO(struct ufs_hba *hba, const char *profile_info, s64 time_us, int err), - TP_ARGS(dev_name, profile_info, time_us, err)); + TP_ARGS(hba, profile_info, time_us, err)); DECLARE_EVENT_CLASS(ufshcd_template, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state), + TP_ARGS(hba, err, usecs, dev_state, link_state), TP_STRUCT__entry( __field(s64, usecs) __field(int, err) - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __field(int, dev_state) __field(int, link_state) ), @@ -207,14 +207,14 @@ DECLARE_EVENT_CLASS(ufshcd_template, TP_fast_assign( __entry->usecs = usecs; __entry->err = err; - __assign_str(dev_name); + __entry->hba = hba; __entry->dev_state = dev_state; __entry->link_state = link_state; ), TP_printk( "%s: took %lld usecs, dev_state: %s, link_state: %s, err %d", - __get_str(dev_name), + dev_name(__entry->hba->dev), __entry->usecs, __print_symbolic(__entry->dev_state, UFS_PWR_MODES), __print_symbolic(__entry->link_state, UFS_LINK_STATES), @@ -223,60 +223,62 @@ DECLARE_EVENT_CLASS(ufshcd_template, ); DEFINE_EVENT(ufshcd_template, ufshcd_system_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_system_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_init, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_wl_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_wl_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_suspend, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_resume, - TP_PROTO(const char *dev_name, int err, s64 usecs, + TP_PROTO(struct ufs_hba *hba, int err, s64 usecs, int dev_state, int link_state), - TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TP_ARGS(hba, err, usecs, dev_state, link_state)); TRACE_EVENT(ufshcd_command, - TP_PROTO(struct scsi_device *sdev, enum ufs_trace_str_t str_t, + TP_PROTO(struct scsi_device *sdev, struct ufs_hba *hba, + enum ufs_trace_str_t str_t, unsigned int tag, u32 doorbell, u32 hwq_id, int transfer_len, u32 intr, u64 lba, u8 opcode, u8 group_id), - TP_ARGS(sdev, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba, + TP_ARGS(sdev, hba, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba, opcode, group_id), TP_STRUCT__entry( __field(struct scsi_device *, sdev) + __field(struct ufs_hba *, hba) __field(enum ufs_trace_str_t, str_t) __field(unsigned int, tag) __field(u32, doorbell) @@ -290,6 +292,7 @@ TRACE_EVENT(ufshcd_command, TP_fast_assign( __entry->sdev = sdev; + __entry->hba = hba; __entry->str_t = str_t; __entry->tag = tag; __entry->doorbell = doorbell; @@ -312,13 +315,13 @@ TRACE_EVENT(ufshcd_command, ); TRACE_EVENT(ufshcd_uic_command, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, u32 cmd, + TP_PROTO(struct ufs_hba *hba, enum ufs_trace_str_t str_t, u32 cmd, u32 arg1, u32 arg2, u32 arg3), - TP_ARGS(dev_name, str_t, cmd, arg1, arg2, arg3), + TP_ARGS(hba, str_t, cmd, arg1, arg2, arg3), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __field(enum ufs_trace_str_t, str_t) __field(u32, cmd) __field(u32, arg1) @@ -327,7 +330,7 @@ TRACE_EVENT(ufshcd_uic_command, ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __entry->str_t = str_t; __entry->cmd = cmd; __entry->arg1 = arg1; @@ -337,19 +340,19 @@ TRACE_EVENT(ufshcd_uic_command, TP_printk( "%s: %s: cmd: 0x%x, arg1: 0x%x, arg2: 0x%x, arg3: 0x%x", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), + show_ufs_cmd_trace_str(__entry->str_t), dev_name(__entry->hba->dev), __entry->cmd, __entry->arg1, __entry->arg2, __entry->arg3 ) ); TRACE_EVENT(ufshcd_upiu, - TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, void *hdr, + TP_PROTO(struct ufs_hba *hba, enum ufs_trace_str_t str_t, void *hdr, void *tsf, enum ufs_trace_tsf_t tsf_t), - TP_ARGS(dev_name, str_t, hdr, tsf, tsf_t), + TP_ARGS(hba, str_t, hdr, tsf, tsf_t), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __field(enum ufs_trace_str_t, str_t) __array(unsigned char, hdr, 12) __array(unsigned char, tsf, 16) @@ -357,7 +360,7 @@ TRACE_EVENT(ufshcd_upiu, ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __entry->str_t = str_t; memcpy(__entry->hdr, hdr, sizeof(__entry->hdr)); memcpy(__entry->tsf, tsf, sizeof(__entry->tsf)); @@ -366,7 +369,7 @@ TRACE_EVENT(ufshcd_upiu, TP_printk( "%s: %s: HDR:%s, %s:%s", - show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), + show_ufs_cmd_trace_str(__entry->str_t), dev_name(__entry->hba->dev), __print_hex(__entry->hdr, sizeof(__entry->hdr)), show_ufs_cmd_trace_tsf(__entry->tsf_t), __print_hex(__entry->tsf, sizeof(__entry->tsf)) @@ -375,22 +378,22 @@ TRACE_EVENT(ufshcd_upiu, TRACE_EVENT(ufshcd_exception_event, - TP_PROTO(const char *dev_name, u16 status), + TP_PROTO(struct ufs_hba *hba, u16 status), - TP_ARGS(dev_name, status), + TP_ARGS(hba, status), TP_STRUCT__entry( - __string(dev_name, dev_name) + __field(struct ufs_hba *, hba) __field(u16, status) ), TP_fast_assign( - __assign_str(dev_name); + __entry->hba = hba; __entry->status = status; ), TP_printk("%s: status 0x%x", - __get_str(dev_name), __entry->status + dev_name(__entry->hba->dev), __entry->status ) ); diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index 786f20ef2238..10b4a19a70f1 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -117,11 +117,12 @@ static inline u32 ufshcd_vops_get_ufs_hci_version(struct ufs_hba *hba) return ufshcd_readl(hba, REG_UFS_VERSION); } -static inline int ufshcd_vops_clk_scale_notify(struct ufs_hba *hba, - bool up, enum ufs_notify_change_status status) +static inline int ufshcd_vops_clk_scale_notify(struct ufs_hba *hba, bool up, + unsigned long target_freq, + enum ufs_notify_change_status status) { if (hba->vops && hba->vops->clk_scale_notify) - return hba->vops->clk_scale_notify(hba, up, status); + return hba->vops->clk_scale_notify(hba, up, target_freq, status); return 0; } @@ -159,9 +160,9 @@ static inline int ufshcd_vops_link_startup_notify(struct ufs_hba *hba, } static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba, - enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, - struct ufs_pa_layer_attr *dev_req_params) + enum ufs_notify_change_status status, + const struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) { if (hba->vops && hba->vops->pwr_change_notify) return hba->vops->pwr_change_notify(hba, status, @@ -270,6 +271,14 @@ static inline int ufshcd_mcq_vops_config_esi(struct ufs_hba *hba) return -EOPNOTSUPP; } +static inline u32 ufshcd_vops_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) +{ + if (hba->vops && hba->vops->freq_to_gear_speed) + return hba->vops->freq_to_gear_speed(hba, freq); + + return 0; +} + extern const struct ufs_pm_lvl_states ufs_pm_lvl_states[]; /** diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index cd404ade48dc..0534390c2a35 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -266,7 +266,7 @@ static bool ufshcd_has_pending_tasks(struct ufs_hba *hba) static bool ufshcd_is_ufs_dev_busy(struct ufs_hba *hba) { - return hba->outstanding_reqs || ufshcd_has_pending_tasks(hba); + return scsi_host_busy(hba->host) || ufshcd_has_pending_tasks(hba); } static const struct ufs_dev_quirk ufs_fixups[] = { @@ -369,7 +369,7 @@ static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag, else header = &hba->lrb[tag].ucd_rsp_ptr->header; - trace_ufshcd_upiu(dev_name(hba->dev), str_t, header, &rq->sc.cdb, + trace_ufshcd_upiu(hba, str_t, header, &rq->sc.cdb, UFS_TSF_CDB); } @@ -380,7 +380,7 @@ static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, if (!trace_ufshcd_upiu_enabled()) return; - trace_ufshcd_upiu(dev_name(hba->dev), str_t, &rq_rsp->header, + trace_ufshcd_upiu(hba, str_t, &rq_rsp->header, &rq_rsp->qr, UFS_TSF_OSF); } @@ -393,12 +393,12 @@ static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag, return; if (str_t == UFS_TM_SEND) - trace_ufshcd_upiu(dev_name(hba->dev), str_t, + trace_ufshcd_upiu(hba, str_t, &descp->upiu_req.req_header, &descp->upiu_req.input_param1, UFS_TSF_TM_INPUT); else - trace_ufshcd_upiu(dev_name(hba->dev), str_t, + trace_ufshcd_upiu(hba, str_t, &descp->upiu_rsp.rsp_header, &descp->upiu_rsp.output_param1, UFS_TSF_TM_OUTPUT); @@ -418,7 +418,7 @@ static void ufshcd_add_uic_command_trace(struct ufs_hba *hba, else cmd = ufshcd_readl(hba, REG_UIC_COMMAND); - trace_ufshcd_uic_command(dev_name(hba->dev), str_t, cmd, + trace_ufshcd_uic_command(hba, str_t, cmd, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_1), ufshcd_readl(hba, REG_UIC_COMMAND_ARG_2), ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3)); @@ -473,7 +473,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag, } else { doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); } - trace_ufshcd_command(cmd->device, str_t, tag, doorbell, hwq_id, + trace_ufshcd_command(cmd->device, hba, str_t, tag, doorbell, hwq_id, transfer_len, intr, lba, opcode, group_id); } @@ -628,8 +628,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba) const struct scsi_device *sdev_ufs = hba->ufs_device_wlun; dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state); - dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n", - hba->outstanding_reqs, hba->outstanding_tasks); + dev_err(hba->dev, "%d outstanding reqs, tasks=0x%lx\n", + scsi_host_busy(hba->host), hba->outstanding_tasks); dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n", hba->saved_err, hba->saved_uic_err); dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n", @@ -1063,7 +1063,7 @@ static int ufshcd_set_clk_freq(struct ufs_hba *hba, bool scale_up) clki->max_freq, ret); break; } - trace_ufshcd_clk_scaling(dev_name(hba->dev), + trace_ufshcd_clk_scaling(hba, "scaled up", clki->name, clki->curr_freq, clki->max_freq); @@ -1081,7 +1081,7 @@ static int ufshcd_set_clk_freq(struct ufs_hba *hba, bool scale_up) clki->min_freq, ret); break; } - trace_ufshcd_clk_scaling(dev_name(hba->dev), + trace_ufshcd_clk_scaling(hba, "scaled down", clki->name, clki->curr_freq, clki->min_freq); @@ -1122,7 +1122,7 @@ int ufshcd_opp_config_clks(struct device *dev, struct opp_table *opp_table, return ret; } - trace_ufshcd_clk_scaling(dev_name(dev), + trace_ufshcd_clk_scaling(hba, (scaling_down ? "scaled down" : "scaled up"), clki->name, hba->clk_scaling.target_freq, freq); } @@ -1162,7 +1162,7 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, unsigned long freq, int ret = 0; ktime_t start = ktime_get(); - ret = ufshcd_vops_clk_scale_notify(hba, scale_up, PRE_CHANGE); + ret = ufshcd_vops_clk_scale_notify(hba, scale_up, freq, PRE_CHANGE); if (ret) goto out; @@ -1173,7 +1173,7 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, unsigned long freq, if (ret) goto out; - ret = ufshcd_vops_clk_scale_notify(hba, scale_up, POST_CHANGE); + ret = ufshcd_vops_clk_scale_notify(hba, scale_up, freq, POST_CHANGE); if (ret) { if (hba->use_pm_opp) ufshcd_opp_set_rate(hba, @@ -1186,7 +1186,7 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, unsigned long freq, ufshcd_pm_qos_update(hba, scale_up); out: - trace_ufshcd_profile_clk_scaling(dev_name(hba->dev), + trace_ufshcd_profile_clk_scaling(hba, (scale_up ? "up" : "down"), ktime_to_us(ktime_sub(ktime_get(), start)), ret); return ret; @@ -1313,16 +1313,26 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, /** * ufshcd_scale_gear - scale up/down UFS gear * @hba: per adapter instance + * @target_gear: target gear to scale to * @scale_up: True for scaling up gear and false for scaling down * * Return: 0 for success; -EBUSY if scaling can't happen at this time; * non-zero for any other errors. */ -static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) +static int ufshcd_scale_gear(struct ufs_hba *hba, u32 target_gear, bool scale_up) { int ret = 0; struct ufs_pa_layer_attr new_pwr_info; + if (target_gear) { + new_pwr_info = hba->pwr_info; + new_pwr_info.gear_tx = target_gear; + new_pwr_info.gear_rx = target_gear; + + goto config_pwr_mode; + } + + /* Legacy gear scaling, in case vops_freq_to_gear_speed() is not implemented */ if (scale_up) { memcpy(&new_pwr_info, &hba->clk_scaling.saved_pwr_info, sizeof(struct ufs_pa_layer_attr)); @@ -1343,6 +1353,7 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) } } +config_pwr_mode: /* check if the power mode needs to be changed or not? */ ret = ufshcd_config_pwr_mode(hba, &new_pwr_info); if (ret) @@ -1387,13 +1398,13 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) return ret; } -static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool scale_up) +static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err) { up_write(&hba->clk_scaling_lock); - /* Enable Write Booster if we have scaled up else disable it */ + /* Enable Write Booster if current gear requires it else disable it */ if (ufshcd_enable_wb_if_scaling_up(hba) && !err) - ufshcd_wb_toggle(hba, scale_up); + ufshcd_wb_toggle(hba, hba->pwr_info.gear_rx >= hba->clk_scaling.wb_gear); mutex_unlock(&hba->wb_mutex); @@ -1413,15 +1424,19 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq, bool scale_up) { + u32 old_gear = hba->pwr_info.gear_rx; + u32 new_gear = 0; int ret = 0; + new_gear = ufshcd_vops_freq_to_gear_speed(hba, freq); + ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC); if (ret) return ret; /* scale down the gear before scaling down clocks */ if (!scale_up) { - ret = ufshcd_scale_gear(hba, false); + ret = ufshcd_scale_gear(hba, new_gear, false); if (ret) goto out_unprepare; } @@ -1429,13 +1444,13 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq, ret = ufshcd_scale_clks(hba, freq, scale_up); if (ret) { if (!scale_up) - ufshcd_scale_gear(hba, true); + ufshcd_scale_gear(hba, old_gear, true); goto out_unprepare; } /* scale up the gear after scaling up clocks */ if (scale_up) { - ret = ufshcd_scale_gear(hba, true); + ret = ufshcd_scale_gear(hba, new_gear, true); if (ret) { ufshcd_scale_clks(hba, hba->devfreq->previous_freq, false); @@ -1444,7 +1459,7 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, unsigned long freq, } out_unprepare: - ufshcd_clock_scaling_unprepare(hba, ret, scale_up); + ufshcd_clock_scaling_unprepare(hba, ret); return ret; } @@ -1548,7 +1563,7 @@ static int ufshcd_devfreq_target(struct device *dev, if (!ret) hba->clk_scaling.target_freq = *freq; - trace_ufshcd_profile_clk_scaling(dev_name(hba->dev), + trace_ufshcd_profile_clk_scaling(hba, (scale_up ? "up" : "down"), ktime_to_us(ktime_sub(ktime_get(), start)), ret); @@ -1720,6 +1735,8 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_clk_info *clki; + unsigned long freq; u32 value; int err = 0; @@ -1743,14 +1760,25 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, if (value) { ufshcd_resume_clkscaling(hba); - } else { - ufshcd_suspend_clkscaling(hba); - err = ufshcd_devfreq_scale(hba, ULONG_MAX, true); - if (err) - dev_err(hba->dev, "%s: failed to scale clocks up %d\n", - __func__, err); + goto out_rel; } + clki = list_first_entry(&hba->clk_list_head, struct ufs_clk_info, list); + freq = clki->max_freq; + + ufshcd_suspend_clkscaling(hba); + + if (!ufshcd_is_devfreq_scaling_required(hba, freq, true)) + goto out_rel; + + err = ufshcd_devfreq_scale(hba, freq, true); + if (err) + dev_err(hba->dev, "%s: failed to scale clocks up %d\n", + __func__, err); + else + hba->clk_scaling.target_freq = freq; + +out_rel: ufshcd_release(hba); ufshcd_rpm_put_sync(hba); out: @@ -1783,6 +1811,10 @@ static void ufshcd_init_clk_scaling(struct ufs_hba *hba) if (!hba->clk_scaling.min_gear) hba->clk_scaling.min_gear = UFS_HS_G1; + if (!hba->clk_scaling.wb_gear) + /* Use intermediate gear speed HS_G3 as the default wb_gear */ + hba->clk_scaling.wb_gear = UFS_HS_G3; + INIT_WORK(&hba->clk_scaling.suspend_work, ufshcd_clk_scaling_suspend_work); INIT_WORK(&hba->clk_scaling.resume_work, @@ -1881,7 +1913,7 @@ void ufshcd_hold(struct ufs_hba *hba) case REQ_CLKS_OFF: if (cancel_delayed_work(&hba->clk_gating.gate_work)) { hba->clk_gating.state = CLKS_ON; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); break; } @@ -1893,7 +1925,7 @@ void ufshcd_hold(struct ufs_hba *hba) fallthrough; case CLKS_OFF: hba->clk_gating.state = REQ_CLKS_ON; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); queue_work(hba->clk_gating.clk_gating_workq, &hba->clk_gating.ungate_work); @@ -1933,7 +1965,7 @@ static void ufshcd_gate_work(struct work_struct *work) if (hba->clk_gating.is_suspended || hba->clk_gating.state != REQ_CLKS_OFF) { hba->clk_gating.state = CLKS_ON; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); return; } @@ -1955,7 +1987,7 @@ static void ufshcd_gate_work(struct work_struct *work) hba->clk_gating.state = CLKS_ON; dev_err(hba->dev, "%s: hibern8 enter failed %d\n", __func__, ret); - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); return; } @@ -1980,7 +2012,7 @@ static void ufshcd_gate_work(struct work_struct *work) guard(spinlock_irqsave)(&hba->clk_gating.lock); if (hba->clk_gating.state == REQ_CLKS_OFF) { hba->clk_gating.state = CLKS_OFF; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); } } @@ -2006,7 +2038,7 @@ static void __ufshcd_release(struct ufs_hba *hba) } hba->clk_gating.state = REQ_CLKS_OFF; - trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); queue_delayed_work(hba->clk_gating.clk_gating_workq, &hba->clk_gating.gate_work, msecs_to_jiffies(hba->clk_gating.delay_ms)); @@ -2120,8 +2152,6 @@ static void ufshcd_init_clk_gating(struct ufs_hba *hba) INIT_DELAYED_WORK(&hba->clk_gating.gate_work, ufshcd_gate_work); INIT_WORK(&hba->clk_gating.ungate_work, ufshcd_ungate_work); - spin_lock_init(&hba->clk_gating.lock); - hba->clk_gating.clk_gating_workq = alloc_ordered_workqueue( "ufs_clk_gating_%d", WQ_MEM_RECLAIM | WQ_HIGHPRI, hba->host->host_no); @@ -3106,8 +3136,13 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case UPIU_TRANSACTION_QUERY_RSP: { u8 response = lrbp->ucd_rsp_ptr->header.response; - if (response == 0) + if (response == 0) { err = ufshcd_copy_query_response(hba, lrbp); + } else { + err = -EINVAL; + dev_err(hba->dev, "%s: unexpected response in Query RSP: %x\n", + __func__, response); + } break; } case UPIU_TRANSACTION_REJECT_UPIU: @@ -4002,7 +4037,7 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba) * * Return: 0 on success, non-zero value on failure. */ -static int ufshcd_dme_reset(struct ufs_hba *hba) +int ufshcd_dme_reset(struct ufs_hba *hba) { struct uic_command uic_cmd = { .command = UIC_CMD_DME_RESET, @@ -4016,6 +4051,7 @@ static int ufshcd_dme_reset(struct ufs_hba *hba) return ret; } +EXPORT_SYMBOL_GPL(ufshcd_dme_reset); int ufshcd_dme_configure_adapt(struct ufs_hba *hba, int agreed_gear, @@ -4041,7 +4077,7 @@ EXPORT_SYMBOL_GPL(ufshcd_dme_configure_adapt); * * Return: 0 on success, non-zero value on failure. */ -static int ufshcd_dme_enable(struct ufs_hba *hba) +int ufshcd_dme_enable(struct ufs_hba *hba) { struct uic_command uic_cmd = { .command = UIC_CMD_DME_ENABLE, @@ -4055,6 +4091,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba) return ret; } +EXPORT_SYMBOL_GPL(ufshcd_dme_enable); static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba) { @@ -4419,7 +4456,7 @@ int ufshcd_uic_hibern8_enter(struct ufs_hba *hba) ufshcd_vops_hibern8_notify(hba, UIC_CMD_DME_HIBER_ENTER, PRE_CHANGE); ret = ufshcd_uic_pwr_ctrl(hba, &uic_cmd); - trace_ufshcd_profile_hibern8(dev_name(hba->dev), "enter", + trace_ufshcd_profile_hibern8(hba, "enter", ktime_to_us(ktime_sub(ktime_get(), start)), ret); if (ret) @@ -4444,7 +4481,7 @@ int ufshcd_uic_hibern8_exit(struct ufs_hba *hba) ufshcd_vops_hibern8_notify(hba, UIC_CMD_DME_HIBER_EXIT, PRE_CHANGE); ret = ufshcd_uic_pwr_ctrl(hba, &uic_cmd); - trace_ufshcd_profile_hibern8(dev_name(hba->dev), "exit", + trace_ufshcd_profile_hibern8(hba, "exit", ktime_to_us(ktime_sub(ktime_get(), start)), ret); if (ret) { @@ -5805,7 +5842,7 @@ static int ufshcd_enable_auto_bkops(struct ufs_hba *hba) } hba->auto_bkops_enabled = true; - trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Enabled"); + trace_ufshcd_auto_bkops_state(hba, "Enabled"); /* No need of URGENT_BKOPS exception from the device */ err = ufshcd_disable_ee(hba, MASK_EE_URGENT_BKOPS); @@ -5856,7 +5893,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) } hba->auto_bkops_enabled = false; - trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); + trace_ufshcd_auto_bkops_state(hba, "Disabled"); hba->is_urgent_bkops_lvl_checked = false; out: return err; @@ -5976,24 +6013,6 @@ static void ufshcd_bkops_exception_event_handler(struct ufs_hba *hba) __func__, err); } -static void ufshcd_temp_exception_event_handler(struct ufs_hba *hba, u16 status) -{ - u32 value; - - if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, - QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, &value)) - return; - - dev_info(hba->dev, "exception Tcase %d\n", value - 80); - - ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); - - /* - * A placeholder for the platform vendors to add whatever additional - * steps required - */ -} - static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { u8 index; @@ -6208,13 +6227,18 @@ static void ufshcd_exception_event_handler(struct work_struct *work) return; } - trace_ufshcd_exception_event(dev_name(hba->dev), status); + trace_ufshcd_exception_event(hba, status); if (status & hba->ee_drv_mask & MASK_EE_URGENT_BKOPS) ufshcd_bkops_exception_event_handler(hba); if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP) - ufshcd_temp_exception_event_handler(hba, status); + ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); + + if (status & hba->ee_drv_mask & MASK_EE_HEALTH_CRITICAL) { + hba->critical_health_count++; + sysfs_notify(&hba->dev->kobj, NULL, "critical_health"); + } ufs_debugfs_exception_event(hba, status); } @@ -7667,7 +7691,7 @@ static void ufshcd_process_probe_result(struct ufs_hba *hba, hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; spin_unlock_irqrestore(hba->host->host_lock, flags); - trace_ufshcd_init(dev_name(hba->dev), ret, + trace_ufshcd_init(hba, ret, ktime_to_us(ktime_sub(ktime_get(), probe_start)), hba->curr_dev_pwr_mode, hba->uic_link_state); } @@ -8308,6 +8332,11 @@ static int ufs_get_device_desc(struct ufs_hba *hba) ufshcd_temp_notif_probe(hba, desc_buf); + if (dev_info->wspecversion >= 0x410) { + hba->critical_health_count = 0; + ufshcd_enable_ee(hba, MASK_EE_HEALTH_CRITICAL); + } + ufs_init_rtc(hba, desc_buf); /* @@ -8897,7 +8926,7 @@ static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n", __func__, hba->outstanding_tasks); - return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; + return scsi_host_busy(hba->host) ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; } static const struct attribute_group *ufshcd_driver_groups[] = { @@ -9160,15 +9189,15 @@ static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on) if (!IS_ERR_OR_NULL(clki->clk) && clki->enabled) clk_disable_unprepare(clki->clk); } - } else if (!ret && on) { + } else if (!ret && on && hba->clk_gating.is_initialized) { scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) hba->clk_gating.state = CLKS_ON; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); } if (clk_state_changed) - trace_ufshcd_profile_clk_gating(dev_name(hba->dev), + trace_ufshcd_profile_clk_gating(hba, (on ? "on" : "off"), ktime_to_us(ktime_sub(ktime_get(), start)), ret); return ret; @@ -9868,7 +9897,7 @@ static int ufshcd_wl_runtime_suspend(struct device *dev) if (ret) dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); - trace_ufshcd_wl_runtime_suspend(dev_name(dev), ret, + trace_ufshcd_wl_runtime_suspend(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); @@ -9888,7 +9917,7 @@ static int ufshcd_wl_runtime_resume(struct device *dev) if (ret) dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); - trace_ufshcd_wl_runtime_resume(dev_name(dev), ret, + trace_ufshcd_wl_runtime_resume(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); @@ -9920,7 +9949,7 @@ static int ufshcd_wl_suspend(struct device *dev) out: if (!ret) hba->is_sys_suspended = true; - trace_ufshcd_wl_suspend(dev_name(dev), ret, + trace_ufshcd_wl_suspend(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); @@ -9943,7 +9972,7 @@ static int ufshcd_wl_resume(struct device *dev) if (ret) dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); out: - trace_ufshcd_wl_resume(dev_name(dev), ret, + trace_ufshcd_wl_resume(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); if (!ret) @@ -9981,7 +10010,7 @@ static int ufshcd_suspend(struct ufs_hba *hba) } if (ufshcd_is_clkgating_allowed(hba)) { hba->clk_gating.state = CLKS_OFF; - trace_ufshcd_clk_gating(dev_name(hba->dev), + trace_ufshcd_clk_gating(hba, hba->clk_gating.state); } @@ -10054,7 +10083,7 @@ int ufshcd_system_suspend(struct device *dev) ret = ufshcd_suspend(hba); out: - trace_ufshcd_system_suspend(dev_name(hba->dev), ret, + trace_ufshcd_system_suspend(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); return ret; @@ -10082,7 +10111,7 @@ int ufshcd_system_resume(struct device *dev) ret = ufshcd_resume(hba); out: - trace_ufshcd_system_resume(dev_name(hba->dev), ret, + trace_ufshcd_system_resume(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); @@ -10108,7 +10137,7 @@ int ufshcd_runtime_suspend(struct device *dev) ret = ufshcd_suspend(hba); - trace_ufshcd_runtime_suspend(dev_name(hba->dev), ret, + trace_ufshcd_runtime_suspend(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); return ret; @@ -10135,7 +10164,7 @@ int ufshcd_runtime_resume(struct device *dev) ret = ufshcd_resume(hba); - trace_ufshcd_runtime_resume(dev_name(hba->dev), ret, + trace_ufshcd_runtime_resume(hba, ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); return ret; @@ -10246,16 +10275,6 @@ int ufshcd_system_thaw(struct device *dev) EXPORT_SYMBOL_GPL(ufshcd_system_thaw); #endif /* CONFIG_PM_SLEEP */ -/** - * ufshcd_dealloc_host - deallocate Host Bus Adapter (HBA) - * @hba: pointer to Host Bus Adapter (HBA) - */ -void ufshcd_dealloc_host(struct ufs_hba *hba) -{ - scsi_host_put(hba->host); -} -EXPORT_SYMBOL_GPL(ufshcd_dealloc_host); - /** * ufshcd_set_dma_mask - Set dma mask based on the controller * addressing capability @@ -10274,12 +10293,26 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba) return dma_set_mask_and_coherent(hba->dev, DMA_BIT_MASK(32)); } +/** + * ufshcd_devres_release - devres cleanup handler, invoked during release of + * hba->dev + * @host: pointer to SCSI host + */ +static void ufshcd_devres_release(void *host) +{ + scsi_host_put(host); +} + /** * ufshcd_alloc_host - allocate Host Bus Adapter (HBA) * @dev: pointer to device handle * @hba_handle: driver private handle * * Return: 0 on success, non-zero value on failure. + * + * NOTE: There is no corresponding ufshcd_dealloc_host() because this function + * keeps track of its allocations using devres and deallocates everything on + * device removal automatically. */ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) { @@ -10301,6 +10334,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } + + err = devm_add_action_or_reset(dev, ufshcd_devres_release, + host); + if (err) + return dev_err_probe(dev, err, + "failed to add ufshcd dealloc action\n"); + host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); hba->host = host; @@ -10429,6 +10469,27 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->irq = irq; hba->vps = &ufs_hba_vps; + /* + * Initialize clk_gating.lock early since it is being used in + * ufshcd_setup_clocks() + */ + spin_lock_init(&hba->clk_gating.lock); + + /* + * Set the default power management level for runtime and system PM. + * Host controller drivers can override them in their + * 'ufs_hba_variant_ops::init' callback. + * + * Default power saving mode is to keep UFS link in Hibern8 state + * and UFS device in sleep state. + */ + hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + err = ufshcd_hba_init(hba); if (err) goto out_error; @@ -10542,21 +10603,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) goto out_disable; } - /* - * Set the default power management level for runtime and system PM if - * not set by the host controller drivers. - * Default power saving mode is to keep UFS link in Hibern8 state - * and UFS device in sleep state. - */ - if (!hba->rpm_lvl) - hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - if (!hba->spm_lvl) - hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, ufshcd_rpm_dev_flush_recheck_work); INIT_DELAYED_WORK(&hba->ufs_rtc_update_work, ufshcd_rtc_work); diff --git a/drivers/ufs/host/Kconfig b/drivers/ufs/host/Kconfig index 580c8d0bd8bb..191fbd799ec5 100644 --- a/drivers/ufs/host/Kconfig +++ b/drivers/ufs/host/Kconfig @@ -142,3 +142,15 @@ config SCSI_UFS_SPRD Select this if you have UFS controller on Unisoc chipset. If unsure, say N. + +config SCSI_UFS_ROCKCHIP + tristate "Rockchip UFS host controller driver" + depends on SCSI_UFSHCD_PLATFORM && (ARCH_ROCKCHIP || COMPILE_TEST) + help + This selects the Rockchip specific additions to UFSHCD platform driver. + UFS host on Rockchip needs some vendor specific configuration before + accessing the hardware which includes PHY configuration and vendor + specific registers. + + Select this if you have UFS controller on Rockchip chipset. + If unsure, say N. diff --git a/drivers/ufs/host/Makefile b/drivers/ufs/host/Makefile index 4573aead02eb..2f97feb5db3f 100644 --- a/drivers/ufs/host/Makefile +++ b/drivers/ufs/host/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_SCSI_UFSHCD_PLATFORM) += ufshcd-pltfrm.o obj-$(CONFIG_SCSI_UFS_HISI) += ufs-hisi.o obj-$(CONFIG_SCSI_UFS_MEDIATEK) += ufs-mediatek.o obj-$(CONFIG_SCSI_UFS_RENESAS) += ufs-renesas.o +obj-$(CONFIG_SCSI_UFS_ROCKCHIP) += ufs-rockchip.o obj-$(CONFIG_SCSI_UFS_SPRD) += ufs-sprd.o obj-$(CONFIG_SCSI_UFS_TI_J721E) += ti-j721e-ufs.o diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 13dd5dfc03eb..cab40d0cf1d5 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -321,7 +321,7 @@ static int exynosauto_ufs_pre_pwr_change(struct exynos_ufs *ufs, } static int exynosauto_ufs_post_pwr_change(struct exynos_ufs *ufs, - struct ufs_pa_layer_attr *pwr) + const struct ufs_pa_layer_attr *pwr) { struct ufs_hba *hba = ufs->hba; u32 enabled_vh; @@ -396,7 +396,7 @@ static int exynos7_ufs_pre_pwr_change(struct exynos_ufs *ufs, } static int exynos7_ufs_post_pwr_change(struct exynos_ufs *ufs, - struct ufs_pa_layer_attr *pwr) + const struct ufs_pa_layer_attr *pwr) { struct ufs_hba *hba = ufs->hba; int lanes = max_t(u32, pwr->lane_rx, pwr->lane_tx); @@ -813,7 +813,7 @@ static u32 exynos_ufs_get_hs_gear(struct ufs_hba *hba) } static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, - struct ufs_pa_layer_attr *dev_max_params, + const struct ufs_pa_layer_attr *dev_max_params, struct ufs_pa_layer_attr *dev_req_params) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -865,7 +865,7 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, #define PWR_MODE_STR_LEN 64 static int exynos_ufs_post_pwr_mode(struct ufs_hba *hba, - struct ufs_pa_layer_attr *pwr_req) + const struct ufs_pa_layer_attr *pwr_req) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); struct phy *generic_phy = ufs->phy; @@ -1634,7 +1634,7 @@ static int exynos_ufs_link_startup_notify(struct ufs_hba *hba, static int exynos_ufs_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, + const struct ufs_pa_layer_attr *dev_max_params, struct ufs_pa_layer_attr *dev_req_params) { int ret = 0; diff --git a/drivers/ufs/host/ufs-exynos.h b/drivers/ufs/host/ufs-exynos.h index 9670dc138d1e..aac517276189 100644 --- a/drivers/ufs/host/ufs-exynos.h +++ b/drivers/ufs/host/ufs-exynos.h @@ -188,7 +188,7 @@ struct exynos_ufs_drv_data { int (*pre_pwr_change)(struct exynos_ufs *ufs, struct ufs_pa_layer_attr *pwr); int (*post_pwr_change)(struct exynos_ufs *ufs, - struct ufs_pa_layer_attr *pwr); + const struct ufs_pa_layer_attr *pwr); int (*pre_hce_enable)(struct exynos_ufs *ufs); int (*post_hce_enable)(struct exynos_ufs *ufs); }; diff --git a/drivers/ufs/host/ufs-hisi.c b/drivers/ufs/host/ufs-hisi.c index 6e6569de74d8..6f2e6bf31225 100644 --- a/drivers/ufs/host/ufs-hisi.c +++ b/drivers/ufs/host/ufs-hisi.c @@ -361,9 +361,9 @@ static void ufs_hisi_pwr_change_pre_change(struct ufs_hba *hba) } static int ufs_hisi_pwr_change_notify(struct ufs_hba *hba, - enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, - struct ufs_pa_layer_attr *dev_req_params) + enum ufs_notify_change_status status, + const struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) { struct ufs_host_params host_params; int ret = 0; diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index 135cd78109e2..182f58d0c9db 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1081,8 +1081,8 @@ static bool ufs_mtk_pmc_via_fastauto(struct ufs_hba *hba, } static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba, - struct ufs_pa_layer_attr *dev_max_params, - struct ufs_pa_layer_attr *dev_req_params) + const struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_host_params host_params; @@ -1134,9 +1134,9 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba, } static int ufs_mtk_pwr_change_notify(struct ufs_hba *hba, - enum ufs_notify_change_status stage, - struct ufs_pa_layer_attr *dev_max_params, - struct ufs_pa_layer_attr *dev_req_params) + enum ufs_notify_change_status stage, + const struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) { int ret = 0; @@ -1643,6 +1643,7 @@ static void ufs_mtk_clk_scale(struct ufs_hba *hba, bool scale_up) } static int ufs_mtk_clk_scale_notify(struct ufs_hba *hba, bool scale_up, + unsigned long target_freq, enum ufs_notify_change_status status) { if (!ufshcd_is_clkscaling_supported(hba)) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 23b9f6efa047..d03a07402223 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -15,6 +15,8 @@ #include <linux/platform_device.h> #include <linux/reset-controller.h> #include <linux/time.h> +#include <linux/unaligned.h> +#include <linux/units.h> #include <soc/qcom/ice.h> @@ -97,7 +99,7 @@ static const struct __ufs_qcom_bw_table { }; static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host); -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up); +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq); static struct ufs_qcom_host *rcdev_to_ufs_host(struct reset_controller_dev *rcd) { @@ -105,6 +107,26 @@ static struct ufs_qcom_host *rcdev_to_ufs_host(struct reset_controller_dev *rcd) } #ifdef CONFIG_SCSI_UFS_CRYPTO +/** + * ufs_qcom_config_ice_allocator() - ICE core allocator configuration + * + * @host: pointer to qcom specific variant structure. + */ +static void ufs_qcom_config_ice_allocator(struct ufs_qcom_host *host) +{ + struct ufs_hba *hba = host->hba; + static const uint8_t val[4] = { NUM_RX_R1W0, NUM_TX_R0W1, NUM_RX_R1W1, NUM_TX_R1W1 }; + u32 config; + + if (!(host->caps & UFS_QCOM_CAP_ICE_CONFIG) || + !(host->hba->caps & UFSHCD_CAP_CRYPTO)) + return; + + config = get_unaligned_le32(val); + + ufshcd_writel(hba, ICE_ALLOCATOR_TYPE, REG_UFS_MEM_ICE_CONFIG); + ufshcd_writel(hba, config, REG_UFS_MEM_ICE_NUM_CORE); +} static inline void ufs_qcom_ice_enable(struct ufs_qcom_host *host) { @@ -248,6 +270,11 @@ static inline int ufs_qcom_ice_suspend(struct ufs_qcom_host *host) { return 0; } + +static void ufs_qcom_config_ice_allocator(struct ufs_qcom_host *host) +{ +} + #endif static void ufs_qcom_disable_lane_clks(struct ufs_qcom_host *host) @@ -496,6 +523,7 @@ static int ufs_qcom_hce_enable_notify(struct ufs_hba *hba, err = ufs_qcom_check_hibern8(hba); ufs_qcom_enable_hw_clk_gating(hba); ufs_qcom_ice_enable(host); + ufs_qcom_config_ice_allocator(host); break; default: dev_err(hba->dev, "%s: invalid status %d\n", __func__, status); @@ -509,16 +537,10 @@ static int ufs_qcom_hce_enable_notify(struct ufs_hba *hba, * ufs_qcom_cfg_timers - Configure ufs qcom cfg timers * * @hba: host controller instance - * @gear: Current operating gear - * @hs: current power mode - * @rate: current operating rate (A or B) - * @update_link_startup_timer: indicate if link_start ongoing * @is_pre_scale_up: flag to check if pre scale up condition. * Return: zero for success and non-zero in case of a failure. */ -static int ufs_qcom_cfg_timers(struct ufs_hba *hba, u32 gear, - u32 hs, u32 rate, bool update_link_startup_timer, - bool is_pre_scale_up) +static int ufs_qcom_cfg_timers(struct ufs_hba *hba, bool is_pre_scale_up) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct ufs_clk_info *clki; @@ -534,11 +556,6 @@ static int ufs_qcom_cfg_timers(struct ufs_hba *hba, u32 gear, if (host->hw_ver.major < 4 && !ufshcd_is_intr_aggr_allowed(hba)) return 0; - if (gear == 0) { - dev_err(hba->dev, "%s: invalid gear = %d\n", __func__, gear); - return -EINVAL; - } - list_for_each_entry(clki, &hba->clk_list_head, list) { if (!strcmp(clki->name, "core_clk")) { if (is_pre_scale_up) @@ -574,14 +591,13 @@ static int ufs_qcom_link_startup_notify(struct ufs_hba *hba, switch (status) { case PRE_CHANGE: - if (ufs_qcom_cfg_timers(hba, UFS_PWM_G1, SLOWAUTO_MODE, - 0, true, false)) { + if (ufs_qcom_cfg_timers(hba, false)) { dev_err(hba->dev, "%s: ufs_qcom_cfg_timers() failed\n", __func__); return -EINVAL; } - err = ufs_qcom_set_core_clk_ctrl(hba, true); + err = ufs_qcom_set_core_clk_ctrl(hba, ULONG_MAX); if (err) dev_err(hba->dev, "cfg core clk ctrl failed\n"); /* @@ -780,7 +796,7 @@ static int ufs_qcom_icc_update_bw(struct ufs_qcom_host *host) static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, + const struct ufs_pa_layer_attr *dev_max_params, struct ufs_pa_layer_attr *dev_req_params) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); @@ -831,9 +847,7 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, } break; case POST_CHANGE: - if (ufs_qcom_cfg_timers(hba, dev_req_params->gear_rx, - dev_req_params->pwr_rx, - dev_req_params->hs_rate, false, false)) { + if (ufs_qcom_cfg_timers(hba, false)) { dev_err(hba->dev, "%s: ufs_qcom_cfg_timers() failed\n", __func__); /* @@ -989,6 +1003,14 @@ static void ufs_qcom_set_host_params(struct ufs_hba *hba) host_params->hs_tx_gear = host_params->hs_rx_gear = ufs_qcom_get_hs_gear(hba); } +static void ufs_qcom_set_host_caps(struct ufs_hba *hba) +{ + struct ufs_qcom_host *host = ufshcd_get_variant(hba); + + if (host->hw_ver.major >= 0x5) + host->caps |= UFS_QCOM_CAP_ICE_CONFIG; +} + static void ufs_qcom_set_caps(struct ufs_hba *hba) { hba->caps |= UFSHCD_CAP_CLK_GATING | UFSHCD_CAP_HIBERN8_WITH_CLK_GATING; @@ -997,6 +1019,8 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba) hba->caps |= UFSHCD_CAP_WB_EN; hba->caps |= UFSHCD_CAP_AGGR_POWER_COLLAPSE; hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND; + + ufs_qcom_set_host_caps(hba); } /** @@ -1292,7 +1316,7 @@ static int ufs_qcom_set_clk_40ns_cycles(struct ufs_hba *hba, return ufshcd_dme_set(hba, UIC_ARG_MIB(PA_VS_CORE_CLK_40NS_CYCLES), reg); } -static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up) +static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, unsigned long freq) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct list_head *head = &hba->clk_list_head; @@ -1306,10 +1330,11 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up) !strcmp(clki->name, "core_clk_unipro")) { if (!clki->max_freq) cycles_in_1us = 150; /* default for backwards compatibility */ - else if (is_scale_up) - cycles_in_1us = ceil(clki->max_freq, (1000 * 1000)); + else if (freq == ULONG_MAX) + cycles_in_1us = ceil(clki->max_freq, HZ_PER_MHZ); else - cycles_in_1us = ceil(clk_get_rate(clki->clk), (1000 * 1000)); + cycles_in_1us = ceil(freq, HZ_PER_MHZ); + break; } } @@ -1346,20 +1371,17 @@ static int ufs_qcom_set_core_clk_ctrl(struct ufs_hba *hba, bool is_scale_up) return ufs_qcom_set_clk_40ns_cycles(hba, cycles_in_1us); } -static int ufs_qcom_clk_scale_up_pre_change(struct ufs_hba *hba) +static int ufs_qcom_clk_scale_up_pre_change(struct ufs_hba *hba, unsigned long freq) { - struct ufs_qcom_host *host = ufshcd_get_variant(hba); - struct ufs_pa_layer_attr *attr = &host->dev_req_params; int ret; - ret = ufs_qcom_cfg_timers(hba, attr->gear_rx, attr->pwr_rx, - attr->hs_rate, false, true); + ret = ufs_qcom_cfg_timers(hba, true); if (ret) { dev_err(hba->dev, "%s ufs cfg timer failed\n", __func__); return ret; } /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, true); + return ufs_qcom_set_core_clk_ctrl(hba, freq); } static int ufs_qcom_clk_scale_up_post_change(struct ufs_hba *hba) @@ -1388,14 +1410,15 @@ static int ufs_qcom_clk_scale_down_pre_change(struct ufs_hba *hba) return err; } -static int ufs_qcom_clk_scale_down_post_change(struct ufs_hba *hba) +static int ufs_qcom_clk_scale_down_post_change(struct ufs_hba *hba, unsigned long freq) { /* set unipro core clock attributes and clear clock divider */ - return ufs_qcom_set_core_clk_ctrl(hba, false); + return ufs_qcom_set_core_clk_ctrl(hba, freq); } -static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, - bool scale_up, enum ufs_notify_change_status status) +static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, bool scale_up, + unsigned long target_freq, + enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); int err; @@ -1409,7 +1432,7 @@ static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, if (err) return err; if (scale_up) - err = ufs_qcom_clk_scale_up_pre_change(hba); + err = ufs_qcom_clk_scale_up_pre_change(hba, target_freq); else err = ufs_qcom_clk_scale_down_pre_change(hba); @@ -1421,7 +1444,7 @@ static int ufs_qcom_clk_scale_notify(struct ufs_hba *hba, if (scale_up) err = ufs_qcom_clk_scale_up_post_change(hba); else - err = ufs_qcom_clk_scale_down_post_change(hba); + err = ufs_qcom_clk_scale_down_post_change(hba, target_freq); if (err) { @@ -1855,6 +1878,36 @@ static int ufs_qcom_config_esi(struct ufs_hba *hba) return ret; } +static u32 ufs_qcom_freq_to_gear_speed(struct ufs_hba *hba, unsigned long freq) +{ + u32 gear = 0; + + switch (freq) { + case 403000000: + gear = UFS_HS_G5; + break; + case 300000000: + gear = UFS_HS_G4; + break; + case 201500000: + gear = UFS_HS_G3; + break; + case 150000000: + case 100000000: + gear = UFS_HS_G2; + break; + case 75000000: + case 37500000: + gear = UFS_HS_G1; + break; + default: + dev_err(hba->dev, "%s: Unsupported clock freq : %lu\n", __func__, freq); + break; + } + + return gear; +} + /* * struct ufs_hba_qcom_vops - UFS QCOM specific variant operations * @@ -1883,6 +1936,7 @@ static const struct ufs_hba_variant_ops ufs_hba_qcom_vops = { .op_runtime_config = ufs_qcom_op_runtime_config, .get_outstanding_cqs = ufs_qcom_get_outstanding_cqs, .config_esi = ufs_qcom_config_esi, + .freq_to_gear_speed = ufs_qcom_freq_to_gear_speed, }; /** diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h index 919f53682beb..d0e6ec9128e7 100644 --- a/drivers/ufs/host/ufs-qcom.h +++ b/drivers/ufs/host/ufs-qcom.h @@ -50,6 +50,9 @@ enum { */ UFS_AH8_CFG = 0xFC, + REG_UFS_MEM_ICE_CONFIG = 0x260C, + REG_UFS_MEM_ICE_NUM_CORE = 0x2664, + REG_UFS_CFG3 = 0x271C, REG_UFS_DEBUG_SPARE_CFG = 0x284C, @@ -110,6 +113,9 @@ enum { /* bit definition for UFS_UFS_TEST_BUS_CTRL_n */ #define TEST_BUS_SUB_SEL_MASK GENMASK(4, 0) /* All XXX_SEL fields are 5 bits wide */ +/* bit definition for UFS Shared ICE config */ +#define UFS_QCOM_CAP_ICE_CONFIG BIT(0) + #define REG_UFS_CFG2_CGC_EN_ALL (UAWM_HW_CGC_EN | UARM_HW_CGC_EN |\ TXUC_HW_CGC_EN | RXUC_HW_CGC_EN |\ DFC_HW_CGC_EN | TRLUT_HW_CGC_EN |\ @@ -135,6 +141,37 @@ enum { #define UNIPRO_CORE_CLK_FREQ_201_5_MHZ 202 #define UNIPRO_CORE_CLK_FREQ_403_MHZ 403 +/* ICE allocator type to share AES engines among TX stream and RX stream */ +#define ICE_ALLOCATOR_TYPE 2 + +/* + * Number of cores allocated for RX stream when Read data block received and + * Write data block is not in progress + */ +#define NUM_RX_R1W0 28 + +/* + * Number of cores allocated for TX stream when Device asked to send write + * data block and Read data block is not in progress + */ +#define NUM_TX_R0W1 28 + +/* + * Number of cores allocated for RX stream when Read data block received and + * Write data block is in progress + * OR + * Device asked to send write data block and Read data block is in progress + */ +#define NUM_RX_R1W1 15 + +/* + * Number of cores allocated for TX stream (UFS write) when Read data block + * received and Write data block is in progress + * OR + * Device asked to send write data block and Read data block is in progress + */ +#define NUM_TX_R1W1 13 + static inline void ufs_qcom_get_controller_revision(struct ufs_hba *hba, u8 *major, u16 *minor, u16 *step) @@ -196,7 +233,7 @@ struct ufs_qcom_host { #ifdef CONFIG_SCSI_UFS_CRYPTO struct qcom_ice *ice; #endif - + u32 caps; void __iomem *dev_ref_clk_ctrl_mmio; bool is_dev_ref_clk_enabled; struct ufs_hw_version hw_ver; diff --git a/drivers/ufs/host/ufs-rockchip.c b/drivers/ufs/host/ufs-rockchip.c new file mode 100644 index 000000000000..5b0ea9820767 --- /dev/null +++ b/drivers/ufs/host/ufs-rockchip.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Rockchip UFS Host Controller driver + * + * Copyright (C) 2025 Rockchip Electronics Co., Ltd. + */ + +#include <linux/clk.h> +#include <linux/gpio.h> +#include <linux/mfd/syscon.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/pm_domain.h> +#include <linux/pm_wakeup.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include <ufs/ufshcd.h> +#include <ufs/unipro.h> +#include "ufshcd-pltfrm.h" +#include "ufs-rockchip.h" + +static int ufs_rockchip_hce_enable_notify(struct ufs_hba *hba, + enum ufs_notify_change_status status) +{ + int err = 0; + + if (status == POST_CHANGE) { + err = ufshcd_dme_reset(hba); + if (err) + return err; + + err = ufshcd_dme_enable(hba); + if (err) + return err; + + return ufshcd_vops_phy_initialization(hba); + } + + return 0; +} + +static void ufs_rockchip_set_pm_lvl(struct ufs_hba *hba) +{ + hba->rpm_lvl = UFS_PM_LVL_5; + hba->spm_lvl = UFS_PM_LVL_5; +} + +static int ufs_rockchip_rk3576_phy_init(struct ufs_hba *hba) +{ + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(PA_LOCAL_TX_LCC_ENABLE, 0x0), 0x0); + /* enable the mphy DME_SET cfg */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(MPHY_CFG, 0x0), MPHY_CFG_ENABLE); + for (int i = 0; i < 2; i++) { + /* Configuration M - TX */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD, SEL_TX_LANE0 + i), 0x06); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD_EN, SEL_TX_LANE0 + i), 0x02); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_VALUE, SEL_TX_LANE0 + i), 0x44); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE1, SEL_TX_LANE0 + i), 0xe6); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE2, SEL_TX_LANE0 + i), 0x07); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_TASE_VALUE, SEL_TX_LANE0 + i), 0x93); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_BASE_NVALUE, SEL_TX_LANE0 + i), 0xc9); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_POWER_SAVING_CTRL, SEL_TX_LANE0 + i), 0x00); + /* Configuration M - RX */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD, SEL_RX_LANE0 + i), 0x06); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD_EN, SEL_RX_LANE0 + i), 0x00); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE, SEL_RX_LANE0 + i), 0x58); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_PVALUE1, SEL_RX_LANE0 + i), 0x8c); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_PVALUE2, SEL_RX_LANE0 + i), 0x02); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_OPTION, SEL_RX_LANE0 + i), 0xf6); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_POWER_SAVING_CTRL, SEL_RX_LANE0 + i), 0x69); + } + + /* disable the mphy DME_SET cfg */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(MPHY_CFG, 0x0), MPHY_CFG_DISABLE); + + ufs_sys_writel(host->mphy_base, 0x80, CMN_REG23); + ufs_sys_writel(host->mphy_base, 0xB5, TRSV0_REG14); + ufs_sys_writel(host->mphy_base, 0xB5, TRSV1_REG14); + + ufs_sys_writel(host->mphy_base, 0x03, TRSV0_REG15); + ufs_sys_writel(host->mphy_base, 0x03, TRSV1_REG15); + + ufs_sys_writel(host->mphy_base, 0x38, TRSV0_REG08); + ufs_sys_writel(host->mphy_base, 0x38, TRSV1_REG08); + + ufs_sys_writel(host->mphy_base, 0x50, TRSV0_REG29); + ufs_sys_writel(host->mphy_base, 0x50, TRSV1_REG29); + + ufs_sys_writel(host->mphy_base, 0x80, TRSV0_REG2E); + ufs_sys_writel(host->mphy_base, 0x80, TRSV1_REG2E); + + ufs_sys_writel(host->mphy_base, 0x18, TRSV0_REG3C); + ufs_sys_writel(host->mphy_base, 0x18, TRSV1_REG3C); + + ufs_sys_writel(host->mphy_base, 0x03, TRSV0_REG16); + ufs_sys_writel(host->mphy_base, 0x03, TRSV1_REG16); + + ufs_sys_writel(host->mphy_base, 0x20, TRSV0_REG17); + ufs_sys_writel(host->mphy_base, 0x20, TRSV1_REG17); + + ufs_sys_writel(host->mphy_base, 0xC0, TRSV0_REG18); + ufs_sys_writel(host->mphy_base, 0xC0, TRSV1_REG18); + + ufs_sys_writel(host->mphy_base, 0x03, CMN_REG25); + + ufs_sys_writel(host->mphy_base, 0x03, TRSV0_REG3D); + ufs_sys_writel(host->mphy_base, 0x03, TRSV1_REG3D); + + ufs_sys_writel(host->mphy_base, 0xC0, CMN_REG23); + udelay(1); + ufs_sys_writel(host->mphy_base, 0x00, CMN_REG23); + + usleep_range(200, 250); + /* start link up */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(MIB_T_DBG_CPORT_TX_ENDIAN, 0), 0x0); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(MIB_T_DBG_CPORT_RX_ENDIAN, 0), 0x0); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(N_DEVICEID, 0), 0x0); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(N_DEVICEID_VALID, 0), 0x1); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(T_PEERDEVICEID, 0), 0x1); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(T_CONNECTIONSTATE, 0), 0x1); + + return 0; +} + +static int ufs_rockchip_common_init(struct ufs_hba *hba) +{ + struct device *dev = hba->dev; + struct platform_device *pdev = to_platform_device(dev); + struct ufs_rockchip_host *host; + int err; + + host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL); + if (!host) + return -ENOMEM; + + host->ufs_sys_ctrl = devm_platform_ioremap_resource_byname(pdev, "hci_grf"); + if (IS_ERR(host->ufs_sys_ctrl)) + return dev_err_probe(dev, PTR_ERR(host->ufs_sys_ctrl), + "Failed to map HCI system control registers\n"); + + host->ufs_phy_ctrl = devm_platform_ioremap_resource_byname(pdev, "mphy_grf"); + if (IS_ERR(host->ufs_phy_ctrl)) + return dev_err_probe(dev, PTR_ERR(host->ufs_phy_ctrl), + "Failed to map mphy system control registers\n"); + + host->mphy_base = devm_platform_ioremap_resource_byname(pdev, "mphy"); + if (IS_ERR(host->mphy_base)) + return dev_err_probe(dev, PTR_ERR(host->mphy_base), + "Failed to map mphy base registers\n"); + + host->rst = devm_reset_control_array_get_exclusive(dev); + if (IS_ERR(host->rst)) + return dev_err_probe(dev, PTR_ERR(host->rst), + "failed to get reset control\n"); + + reset_control_assert(host->rst); + udelay(1); + reset_control_deassert(host->rst); + + host->ref_out_clk = devm_clk_get_enabled(dev, "ref_out"); + if (IS_ERR(host->ref_out_clk)) + return dev_err_probe(dev, PTR_ERR(host->ref_out_clk), + "ref_out clock unavailable\n"); + + host->rst_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + if (IS_ERR(host->rst_gpio)) + return dev_err_probe(dev, PTR_ERR(host->rst_gpio), + "failed to get reset gpio\n"); + + err = devm_clk_bulk_get_all_enabled(dev, &host->clks); + if (err) + return dev_err_probe(dev, err, "failed to enable clocks\n"); + + host->hba = hba; + + ufshcd_set_variant(hba, host); + + return 0; +} + +static int ufs_rockchip_rk3576_init(struct ufs_hba *hba) +{ + struct device *dev = hba->dev; + int ret; + + hba->quirks = UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING; + + /* Enable BKOPS when suspend */ + hba->caps |= UFSHCD_CAP_AUTO_BKOPS_SUSPEND; + /* Enable putting device into deep sleep */ + hba->caps |= UFSHCD_CAP_DEEPSLEEP; + /* Enable devfreq of UFS */ + hba->caps |= UFSHCD_CAP_CLK_SCALING; + /* Enable WriteBooster */ + hba->caps |= UFSHCD_CAP_WB_EN; + + /* Set the default desired pm level in case no users set via sysfs */ + ufs_rockchip_set_pm_lvl(hba); + + ret = ufs_rockchip_common_init(hba); + if (ret) + return dev_err_probe(dev, ret, "ufs common init fail\n"); + + return 0; +} + +static int ufs_rockchip_device_reset(struct ufs_hba *hba) +{ + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + + gpiod_set_value_cansleep(host->rst_gpio, 1); + usleep_range(20, 25); + + gpiod_set_value_cansleep(host->rst_gpio, 0); + usleep_range(20, 25); + + return 0; +} + +static const struct ufs_hba_variant_ops ufs_hba_rk3576_vops = { + .name = "rk3576", + .init = ufs_rockchip_rk3576_init, + .device_reset = ufs_rockchip_device_reset, + .hce_enable_notify = ufs_rockchip_hce_enable_notify, + .phy_initialization = ufs_rockchip_rk3576_phy_init, +}; + +static const struct of_device_id ufs_rockchip_of_match[] = { + { .compatible = "rockchip,rk3576-ufshc", .data = &ufs_hba_rk3576_vops }, + { }, +}; +MODULE_DEVICE_TABLE(of, ufs_rockchip_of_match); + +static int ufs_rockchip_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct ufs_hba_variant_ops *vops; + int err; + + vops = device_get_match_data(dev); + if (!vops) + return dev_err_probe(dev, -ENODATA, "ufs_hba_variant_ops not defined.\n"); + + err = ufshcd_pltfrm_init(pdev, vops); + if (err) + return dev_err_probe(dev, err, "ufshcd_pltfrm_init failed\n"); + + return 0; +} + +static void ufs_rockchip_remove(struct platform_device *pdev) +{ + ufshcd_pltfrm_remove(pdev); +} + +#ifdef CONFIG_PM +static int ufs_rockchip_runtime_suspend(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + + clk_disable_unprepare(host->ref_out_clk); + + /* Do not power down the genpd if rpm_lvl is less than level 5 */ + dev_pm_genpd_rpm_always_on(dev, hba->rpm_lvl < UFS_PM_LVL_5 ? true : false); + + return ufshcd_runtime_suspend(dev); +} + +static int ufs_rockchip_runtime_resume(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + int err; + + err = clk_prepare_enable(host->ref_out_clk); + if (err) { + dev_err(hba->dev, "failed to enable ref_out clock %d\n", err); + return err; + } + + reset_control_assert(host->rst); + udelay(1); + reset_control_deassert(host->rst); + + return ufshcd_runtime_resume(dev); +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int ufs_rockchip_system_suspend(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + int err; + + /* + * If spm_lvl is less than level 5, it means we need to keep the host + * controller in powered-on state. So device_set_awake_path() is + * calling pm core to notify the genpd provider to meet this requirement + */ + if (hba->spm_lvl < UFS_PM_LVL_5) + device_set_awake_path(dev); + + err = ufshcd_system_suspend(dev); + if (err) { + dev_err(hba->dev, "UFSHCD system susped failed %d\n", err); + return err; + } + + clk_disable_unprepare(host->ref_out_clk); + + return 0; +} + +static int ufs_rockchip_system_resume(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_rockchip_host *host = ufshcd_get_variant(hba); + int err; + + err = clk_prepare_enable(host->ref_out_clk); + if (err) { + dev_err(hba->dev, "failed to enable ref_out clock %d\n", err); + return err; + } + + return ufshcd_system_resume(dev); +} +#endif + +static const struct dev_pm_ops ufs_rockchip_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(ufs_rockchip_system_suspend, ufs_rockchip_system_resume) + SET_RUNTIME_PM_OPS(ufs_rockchip_runtime_suspend, ufs_rockchip_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, +}; + +static struct platform_driver ufs_rockchip_pltform = { + .probe = ufs_rockchip_probe, + .remove = ufs_rockchip_remove, + .driver = { + .name = "ufshcd-rockchip", + .pm = &ufs_rockchip_pm_ops, + .of_match_table = ufs_rockchip_of_match, + }, +}; +module_platform_driver(ufs_rockchip_pltform); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Rockchip UFS Host Driver"); diff --git a/drivers/ufs/host/ufs-rockchip.h b/drivers/ufs/host/ufs-rockchip.h new file mode 100644 index 000000000000..3ba6fb9f73ae --- /dev/null +++ b/drivers/ufs/host/ufs-rockchip.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Rockchip UFS Host Controller driver + * + * Copyright (C) 2025 Rockchip Electronics Co., Ltd. + */ + +#ifndef _UFS_ROCKCHIP_H_ +#define _UFS_ROCKCHIP_H_ + +#define SEL_TX_LANE0 0x0 +#define SEL_TX_LANE1 0x1 +#define SEL_TX_LANE2 0x2 +#define SEL_TX_LANE3 0x3 +#define SEL_RX_LANE0 0x4 +#define SEL_RX_LANE1 0x5 +#define SEL_RX_LANE2 0x6 +#define SEL_RX_LANE3 0x7 + +#define VND_TX_CLK_PRD 0xAA +#define VND_TX_CLK_PRD_EN 0xA9 +#define VND_TX_LINERESET_PVALUE2 0xAB +#define VND_TX_LINERESET_PVALUE1 0xAC +#define VND_TX_LINERESET_VALUE 0xAD +#define VND_TX_BASE_NVALUE 0x93 +#define VND_TX_TASE_VALUE 0x94 +#define VND_TX_POWER_SAVING_CTRL 0x7F +#define VND_RX_CLK_PRD 0x12 +#define VND_RX_CLK_PRD_EN 0x11 +#define VND_RX_LINERESET_PVALUE2 0x1B +#define VND_RX_LINERESET_PVALUE1 0x1C +#define VND_RX_LINERESET_VALUE 0x1D +#define VND_RX_LINERESET_OPTION 0x25 +#define VND_RX_POWER_SAVING_CTRL 0x2F +#define VND_RX_SAVE_DET_CTRL 0x1E + +#define CMN_REG23 0x8C +#define CMN_REG25 0x94 +#define TRSV0_REG08 0xE0 +#define TRSV1_REG08 0x220 +#define TRSV0_REG14 0x110 +#define TRSV1_REG14 0x250 +#define TRSV0_REG15 0x134 +#define TRSV1_REG15 0x274 +#define TRSV0_REG16 0x128 +#define TRSV1_REG16 0x268 +#define TRSV0_REG17 0x12C +#define TRSV1_REG17 0x26c +#define TRSV0_REG18 0x120 +#define TRSV1_REG18 0x260 +#define TRSV0_REG29 0x164 +#define TRSV1_REG29 0x2A4 +#define TRSV0_REG2E 0x178 +#define TRSV1_REG2E 0x2B8 +#define TRSV0_REG3C 0x1B0 +#define TRSV1_REG3C 0x2F0 +#define TRSV0_REG3D 0x1B4 +#define TRSV1_REG3D 0x2F4 + +#define MPHY_CFG 0x200 +#define MPHY_CFG_ENABLE 0x40 +#define MPHY_CFG_DISABLE 0x0 + +#define MIB_T_DBG_CPORT_TX_ENDIAN 0xc022 +#define MIB_T_DBG_CPORT_RX_ENDIAN 0xc023 + +struct ufs_rockchip_host { + struct ufs_hba *hba; + void __iomem *ufs_phy_ctrl; + void __iomem *ufs_sys_ctrl; + void __iomem *mphy_base; + struct gpio_desc *rst_gpio; + struct reset_control *rst; + struct clk *ref_out_clk; + struct clk_bulk_data *clks; + uint64_t caps; +}; + +#define ufs_sys_writel(base, val, reg) \ + writel((val), (base) + (reg)) +#define ufs_sys_readl(base, reg) readl((base) + (reg)) +#define ufs_sys_set_bits(base, mask, reg) \ + ufs_sys_writel( \ + (base), ((mask) | (ufs_sys_readl((base), (reg)))), (reg)) +#define ufs_sys_ctrl_clr_bits(base, mask, reg) \ + ufs_sys_writel((base), \ + ((~(mask)) & (ufs_sys_readl((base), (reg)))), \ + (reg)) + +#endif /* _UFS_ROCKCHIP_H_ */ diff --git a/drivers/ufs/host/ufs-sprd.c b/drivers/ufs/host/ufs-sprd.c index b1d532363f9d..65bd8fb96b99 100644 --- a/drivers/ufs/host/ufs-sprd.c +++ b/drivers/ufs/host/ufs-sprd.c @@ -160,9 +160,9 @@ static int ufs_sprd_common_init(struct ufs_hba *hba) } static int sprd_ufs_pwr_change_notify(struct ufs_hba *hba, - enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, - struct ufs_pa_layer_attr *dev_req_params) + enum ufs_notify_change_status status, + const struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) { struct ufs_sprd_host *host = ufshcd_get_variant(hba); diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index ea39c5d5b8cf..996387906aa1 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -157,7 +157,7 @@ static int ufs_intel_set_lanes(struct ufs_hba *hba, u32 lanes) static int ufs_intel_lkf_pwr_change_notify(struct ufs_hba *hba, enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *dev_max_params, + const struct ufs_pa_layer_attr *dev_max_params, struct ufs_pa_layer_attr *dev_req_params) { int err = 0; @@ -562,7 +562,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(&pdev->dev); pm_runtime_get_noresume(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); } /** @@ -605,7 +604,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = ufshcd_init(hba, mmio_base, pdev->irq); if (err) { dev_err(&pdev->dev, "Initialization failed\n"); - ufshcd_dealloc_host(hba); return err; } diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index 505572d4fa87..ffe5d1d2b215 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -465,21 +465,17 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, struct device *dev = &pdev->dev; mmio_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(mmio_base)) { - err = PTR_ERR(mmio_base); - goto out; - } + if (IS_ERR(mmio_base)) + return PTR_ERR(mmio_base); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = irq; - goto out; - } + if (irq < 0) + return irq; err = ufshcd_alloc_host(dev, &hba); if (err) { dev_err(dev, "Allocation failed\n"); - goto out; + return err; } hba->vops = vops; @@ -488,13 +484,13 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, if (err) { dev_err(dev, "%s: clock parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_parse_regulator_info(hba); if (err) { dev_err(dev, "%s: regulator init failed %d\n", __func__, err); - goto dealloc_host; + return err; } ufshcd_init_lanes_per_dir(hba); @@ -502,25 +498,20 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, err = ufshcd_parse_operating_points(hba); if (err) { dev_err(dev, "%s: OPP parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_init(hba, mmio_base, irq); if (err) { dev_err_probe(dev, err, "Initialization failed with error %d\n", err); - goto dealloc_host; + return err; } pm_runtime_set_active(dev); pm_runtime_enable(dev); return 0; - -dealloc_host: - ufshcd_dealloc_host(hba); -out: - return err; } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init); @@ -534,7 +525,6 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 2eae8fc2e0db..94d478b6bcd3 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2142,8 +2142,8 @@ static int do_scsi_command(struct fsg_common *common) * of Posix locks. */ case FORMAT_UNIT: - case RELEASE: - case RESERVE: + case RELEASE_6: + case RESERVE_6: case SEND_DIAGNOSTIC: default: diff --git a/drivers/usb/storage/debug.c b/drivers/usb/storage/debug.c index 576be66ad962..dda610f689b7 100644 --- a/drivers/usb/storage/debug.c +++ b/drivers/usb/storage/debug.c @@ -58,8 +58,8 @@ void usb_stor_show_command(const struct us_data *us, struct scsi_cmnd *srb) case INQUIRY: what = "INQUIRY"; break; case RECOVER_BUFFERED_DATA: what = "RECOVER_BUFFERED_DATA"; break; case MODE_SELECT: what = "MODE_SELECT"; break; - case RESERVE: what = "RESERVE"; break; - case RELEASE: what = "RELEASE"; break; + case RESERVE_6: what = "RESERVE"; break; + case RELEASE_6: what = "RELEASE"; break; case COPY: what = "COPY"; break; case ERASE: what = "ERASE"; break; case MODE_SENSE: what = "MODE_SENSE"; break; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fc2ef33b67b3..3ea809990ef1 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1803,7 +1803,6 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - u64 need_journal_commit_this_dev; }; static int bch2_discard_one_bucket(struct btree_trans *trans, @@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - pos.inode, pos.offset)) { - s->need_journal_commit++; - s->need_journal_commit_this_dev++; + u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + pos.inode, pos.offset); + if (seq_ready > c->journal.flushed_seq_ondisk) { + if (seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; goto out; } @@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, discard_locked = true; } - if (!bkey_eq(*discard_pos_done, iter.pos) && - ca->mi.discard && !c->opts.nochanges) { - /* - * This works without any other locks because this is the only - * thread that removes items from the need_discard tree - */ - bch2_trans_unlock_long(trans); - blkdev_issue_discard(ca->disk_sb.bdev, - k.k->p.offset * ca->mi.bucket_size, - ca->mi.bucket_size, - GFP_KERNEL); - *discard_pos_done = iter.pos; + if (!bkey_eq(*discard_pos_done, iter.pos)) { s->discarded++; + *discard_pos_done = iter.pos; - ret = bch2_trans_relock_notrace(trans); - if (ret) - goto out; + if (ca->mi.discard && !c->opts.nochanges) { + /* + * This works without any other locks because this is the only + * thread that removes items from the need_discard tree + */ + bch2_trans_unlock_long(trans); + blkdev_issue_discard(ca->disk_sb.bdev, + k.k->p.offset * ca->mi.bucket_size, + ca->mi.bucket_size, + GFP_KERNEL); + ret = bch2_trans_relock_notrace(trans); + if (ret) + goto out; + } } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); @@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work) POS(ca->dev_idx, U64_MAX), 0, k, bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); + if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal)) + bch2_journal_flush_async(&c->journal, NULL); + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); @@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) break; } - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); + trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); percpu_ref_put(&ca->io_ref); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 6df41c331a52..5a781fb4c794 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c, return false; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { + u64 journal_seq_ready = + bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + bucket.inode, bucket.offset); + if (journal_seq_ready > c->journal.flushed_seq_ondisk) { + if (journal_seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; s->skipped_need_journal_commit++; return false; } @@ -570,7 +574,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl) : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl); - if (s.skipped_need_journal_commit * 2 > avail) + if (s.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) { diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index 9bbb28e90b93..4aa8ee026cb8 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -18,6 +18,7 @@ struct bucket_alloc_state { u64 buckets_seen; u64 skipped_open; u64 skipped_need_journal_commit; + u64 need_journal_commit; u64 skipped_nocow; u64 skipped_nouse; u64 skipped_mi_btree_bitmap; diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index c378b97ebeca..1821f40c161a 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -748,7 +748,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) rcu_read_unlock(); mutex_lock(&bc->table.mutex); mutex_unlock(&bc->table.mutex); - rcu_read_lock(); continue; } for (i = 0; i < tbl->size; i++) diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f9fb150eda70..c8a488e6b7b8 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ memset(t->d, 0, sizeof(t->d[0]) << t->bits); } -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, - u64 flushed_seq, - unsigned dev, u64 bucket) +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, + unsigned dev, u64 bucket) { struct buckets_waiting_for_journal_table *t; u64 dev_bucket = (u64) dev << 56 | bucket; - bool ret = false; - unsigned i; + u64 ret = 0; mutex_lock(&b->lock); t = b->t; - for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); if (h->dev_bucket == dev_bucket) { - ret = h->journal_seq > flushed_seq; + ret = h->journal_seq; break; } } diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h index d2ae19cbe18c..365619ca44c8 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.h +++ b/fs/bcachefs/buckets_waiting_for_journal.h @@ -4,8 +4,8 @@ #include "buckets_waiting_for_journal_types.h" -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, - u64, unsigned, u64); +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *, + unsigned, u64); int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, u64, unsigned, u64, u64); diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index d2e134528f0e..428b9be6af34 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -285,12 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +#include "rebalance.h" + static inline struct bch_extent_rebalance bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) { struct bch_io_opts io_opts; bch2_inode_opts_get(&io_opts, c, inode); - return io_opts_to_rebalance_opts(&io_opts); + return io_opts_to_rebalance_opts(c, &io_opts); } int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index cb2c3722f674..24c294d4634e 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -319,6 +319,16 @@ void bch2_journal_halt(struct journal *j) spin_unlock(&j->lock); } +void bch2_journal_halt_locked(struct journal *j) +{ + lockdep_assert_held(&j->lock); + + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); + if (!j->err_seq) + j->err_seq = journal_cur_seq(j); + journal_wake(j); +} + static bool journal_entry_want_write(struct journal *j) { bool ret = !journal_entry_is_open(j) || @@ -381,9 +391,12 @@ static int journal_entry_open(struct journal *j) if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) return JOURNAL_ERR_max_in_flight; - if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, - c, "cannot start: journal seq overflow")) + if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) { + bch_err(c, "cannot start: journal seq overflow"); + if (bch2_fs_emergency_read_only_locked(c)) + bch_err(c, "fatal error - emergency read only"); return JOURNAL_ERR_insufficient_devices; /* -EROFS */ + } BUG_ON(!j->cur_entry_sectors); @@ -783,6 +796,7 @@ int bch2_journal_flush_seq_async(struct journal *j, u64 seq, } buf->must_flush = true; + j->flushing_seq = max(j->flushing_seq, seq); if (parent && !closure_wait(&buf->wait, parent)) BUG(); diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index dccddd5420ad..107f7f901cd9 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -409,6 +409,7 @@ bool bch2_journal_noflush_seq(struct journal *, u64, u64); int bch2_journal_meta(struct journal *); void bch2_journal_halt(struct journal *); +void bch2_journal_halt_locked(struct journal *); static inline int bch2_journal_error(struct journal *j) { diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 3ba433a48eb8..a198a81d7478 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -237,6 +237,7 @@ struct journal { /* seq, last_seq from the most recent journal entry successfully written */ u64 seq_ondisk; u64 flushed_seq_ondisk; + u64 flushing_seq; u64 last_seq_ondisk; u64 err_seq; u64 last_empty_seq; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index a182b5d454ba..9d397fc2a1f0 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -659,18 +659,4 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); bool bch2_opt_is_inode_opt(enum bch_opt_id); -/* rebalance opts: */ - -static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) -{ - return (struct bch_extent_rebalance) { - .type = BIT(BCH_EXTENT_ENTRY_rebalance), -#define x(_name) \ - ._name = opts->_name, \ - ._name##_from_inode = opts->_name##_from_inode, - BCH_REBALANCE_OPTS() -#undef x - }; -}; - #endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 4adc74cd3f70..d0a1f5cd5c2b 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -121,12 +121,10 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) } } incompressible: - if (opts->background_target && - bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { + if (opts->background_target) bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) sectors += p.crc.compressed_size; - } return sectors; } @@ -140,7 +138,7 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { - struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); + struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts); return old == NULL || memcmp(old, &new, sizeof(new)); } else { return old != NULL; @@ -163,7 +161,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, k.k->u64s += sizeof(*old) / sizeof(u64); } - *old = io_opts_to_rebalance_opts(opts); + *old = io_opts_to_rebalance_opts(c, opts); } else { if (old) extent_entry_drop(k, (union bch_extent_entry *) old); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 0a0821ab895d..62a3859d3823 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -4,8 +4,28 @@ #include "compress.h" #include "disk_groups.h" +#include "opts.h" #include "rebalance_types.h" +static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, + struct bch_io_opts *opts) +{ + struct bch_extent_rebalance r = { + .type = BIT(BCH_EXTENT_ENTRY_rebalance), +#define x(_name) \ + ._name = opts->_name, \ + ._name##_from_inode = opts->_name##_from_inode, + BCH_REBALANCE_OPTS() +#undef x + }; + + if (r.background_target && + !bch2_target_accepts_data(c, BCH_DATA_user, r.background_target)) + r.background_target = 0; + + return r; +}; + u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); int bch2_get_update_rebalance_opts(struct btree_trans *, diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index e3d0475232e5..b7b96283c316 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -428,7 +428,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, POS(0, snapid), 0, snapshot); - ret = bkey_err(subvol); + ret = bkey_err(snapshot); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", snapid); if (ret) @@ -440,6 +440,11 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, BTREE_ID_snapshot_trees, POS(0, treeid), 0, snapshot_tree); + ret = bkey_err(snapshot_tree); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing snapshot tree %u", treeid); + if (ret) + goto err; if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { struct bkey_i_snapshot_tree *snapshot_tree_mut = diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d97ea7bd1171..6d97d412fed9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -411,6 +411,17 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } +bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) +{ + bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags); + + bch2_journal_halt_locked(&c->journal); + bch2_fs_read_only_async(c); + + wake_up(&bch2_read_only_wait); + return ret; +} + static int bch2_fs_read_write_late(struct bch_fs *c) { int ret; diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index fa6d52216510..04f8287eff5c 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -29,6 +29,7 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); +bool bch2_fs_emergency_read_only_locked(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); int bch2_fs_read_write(struct bch_fs *); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 56a5a7fbc0fd..c1b51009edf6 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail, TP_ARGS(c, str) ); -TRACE_EVENT(discard_buckets, +DECLARE_EVENT_CLASS(discard_buckets_class, TP_PROTO(struct bch_fs *c, u64 seen, u64 open, u64 need_journal_commit, u64 discarded, const char *err), TP_ARGS(c, seen, open, need_journal_commit, discarded, err), @@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets, __entry->err) ); +DEFINE_EVENT(discard_buckets_class, discard_buckets, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + +DEFINE_EVENT(discard_buckets_class, discard_buckets_fast, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + TRACE_EVENT(bucket_invalidate, TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), TP_ARGS(c, dev, bucket, sectors), diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 92071ca0655f..3dc5a35dd19b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { btrfs_unlock_up_safe(p, parent_level + 1); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); @@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { ASSERT(ret == -EAGAIN); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 30eceaf829a7..4aca7475fd82 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); + /* + * If our ordered extent had an error there's no point in continuing. + * The error may have come from a transaction abort done either by this + * task or some other concurrent task, and the transaction abort path + * iterates over all existing ordered extents and sets the flag + * BTRFS_ORDERED_IOERR on them. + */ + if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { + const int fs_error = BTRFS_FS_ERROR(fs_info); + + return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); + } /* We cannot split partially completed ordered extents. */ if (ordered->bytes_left) { ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b90fabe302e6..f9d3766c809b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su * Commit current transaction to make sure all the rfer/excl numbers * get updated. */ - trans = btrfs_start_transaction(fs_info->quota_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_commit_transaction(trans); + ret = btrfs_commit_current_transaction(fs_info->quota_root); if (ret < 0) return ret; @@ -1897,8 +1893,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su /* * It's squota and the subvolume still has numbers needed for future * accounting, in this case we can not delete it. Just skip it. + * + * Or the qgroup is already removed by a qgroup rescan. For both cases we're + * safe to ignore them. */ - if (ret == -EBUSY) + if (ret == -EBUSY || ret == -ENOENT) ret = 0; return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 15312013f2a3..aca83a98b75a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,8 +274,10 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info, cur_trans = fs_info->running_transaction; if (cur_trans) { if (TRANS_ABORTED(cur_trans)) { + const int abort_error = cur_trans->aborted; + spin_unlock(&fs_info->trans_lock); - return cur_trans->aborted; + return abort_error; } if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 9cc0d47da321..e3634916ffb9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1700,7 +1700,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_flags = 0; - lockref_init(&dentry->d_lockref, 1); + lockref_init(&dentry->d_lockref); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = dentry; @@ -2966,11 +2966,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: - if (alias->d_op->d_unalias_trylock && + if (alias->d_op && alias->d_op->d_unalias_trylock && !alias->d_op->d_unalias_trylock(alias)) goto out_err; __d_move(alias, dentry, false); - if (alias->d_op->d_unalias_unlock) + if (alias->d_op && alias->d_op->d_unalias_unlock) alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 29f8963bb523..d771e06db738 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -726,7 +726,7 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) if (IS_ERR(pcl)) return PTR_ERR(pcl); - lockref_init(&pcl->lockref, 1); /* one ref for this request */ + lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; pcl->length = 0; pcl->partial = true; diff --git a/fs/file_table.c b/fs/file_table.c index f0291a66f9db..5c00dc38558d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); + /* + * Disable permission and pre-content events for all files by default. + * They may be enabled later by file_set_fsnotify_mode_from_watchers(). + */ + file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } @@ -375,7 +380,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, if (IS_ERR(file)) { ihold(inode); path_put(&path); + return file; } + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL(alloc_file_pseudo); @@ -400,6 +411,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode, return file; } file_init_path(file, &path, fops); + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8c4c1f871a88..65c07aa95718 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1201,8 +1201,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (glops->go_instantiate) gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; + lockref_init(&gl->gl_lockref); lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); - gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 04cadc02e5a6..0727f60ad028 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58bc5013ca49..2298e06797ac 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str return NULL; qd->qd_sbd = sdp; - lockref_init(&qd->qd_lockref, 0); + lockref_init(&qd->qd_lockref); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_lru); @@ -297,7 +297,6 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, spin_lock_bucket(hash); *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); if (qd == NULL) { - new_qd->qd_lockref.count++; *qdp = new_qd; list_add(&new_qd->qd_list, &sdp->sd_quota_list); hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); @@ -1450,6 +1449,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) if (qd == NULL) goto fail_brelse; + qd->qd_lockref.count = 0; set_bit(QDF_CHANGE, &qd->qd_flags); qd->qd_change = qc_change; qd->qd_slot = slot; diff --git a/fs/namespace.c b/fs/namespace.c index a3ed3f2980cb..8f1000f9f3df 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5087,30 +5087,29 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) { struct vfsmount *mnt = s->mnt; struct super_block *sb = mnt->mnt_sb; + size_t start = seq->count; int err; - if (sb->s_op->show_options) { - size_t start = seq->count; - - err = security_sb_show_options(seq, sb); - if (err) - return err; + err = security_sb_show_options(seq, sb); + if (err) + return err; + if (sb->s_op->show_options) { err = sb->s_op->show_options(seq, mnt->mnt_root); if (err) return err; + } - if (unlikely(seq_has_overflowed(seq))) - return -EAGAIN; + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; - if (seq->count == start) - return 0; + if (seq->count == start) + return 0; - /* skip leading comma */ - memmove(seq->buf + start, seq->buf + start + 1, - seq->count - start - 1); - seq->count--; - } + /* skip leading comma */ + memmove(seq->buf + start, seq->buf + start + 1, + seq->count - start - 1); + seq->count--; return 0; } @@ -5191,39 +5190,45 @@ static int statmount_string(struct kstatmount *s, u64 flag) size_t kbufsize; struct seq_file *seq = &s->seq; struct statmount *sm = &s->sm; - u32 start = seq->count; + u32 start, *offp; + + /* Reserve an empty string at the beginning for any unset offsets */ + if (!seq->count) + seq_putc(seq, 0); + + start = seq->count; switch (flag) { case STATMOUNT_FS_TYPE: - sm->fs_type = start; + offp = &sm->fs_type; ret = statmount_fs_type(s, seq); break; case STATMOUNT_MNT_ROOT: - sm->mnt_root = start; + offp = &sm->mnt_root; ret = statmount_mnt_root(s, seq); break; case STATMOUNT_MNT_POINT: - sm->mnt_point = start; + offp = &sm->mnt_point; ret = statmount_mnt_point(s, seq); break; case STATMOUNT_MNT_OPTS: - sm->mnt_opts = start; + offp = &sm->mnt_opts; ret = statmount_mnt_opts(s, seq); break; case STATMOUNT_OPT_ARRAY: - sm->opt_array = start; + offp = &sm->opt_array; ret = statmount_opt_array(s, seq); break; case STATMOUNT_OPT_SEC_ARRAY: - sm->opt_sec_array = start; + offp = &sm->opt_sec_array; ret = statmount_opt_sec_array(s, seq); break; case STATMOUNT_FS_SUBTYPE: - sm->fs_subtype = start; + offp = &sm->fs_subtype; statmount_fs_subtype(s, seq); break; case STATMOUNT_SB_SOURCE: - sm->sb_source = start; + offp = &sm->sb_source; ret = statmount_sb_source(s, seq); break; default: @@ -5251,6 +5256,7 @@ static int statmount_string(struct kstatmount *s, u64 flag) seq->buf[seq->count++] = '\0'; sm->mask |= flag; + *offp = start; return 0; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8ee495a58d0a..fae1b6d397ea 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(fsnotify); * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode(struct file *file) +void file_set_fsnotify_mode_from_watchers(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; @@ -665,7 +665,7 @@ void file_set_fsnotify_mode(struct file *file) */ if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return; } @@ -676,7 +676,7 @@ void file_set_fsnotify_mode(struct file *file) if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); return; } @@ -686,19 +686,25 @@ void file_set_fsnotify_mode(struct file *file) */ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) + FSNOTIFY_PRE_CONTENT_EVENTS))) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } /* Is parent watching for pre-content events on this file? */ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { parent = dget_parent(dentry); p_mask = fsnotify_inode_watches_children(d_inode(parent)); dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } } /* Nobody watching for pre-content events from this file */ - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); } #endif diff --git a/fs/open.c b/fs/open.c index 932e5a6de63b..1be20de9f283 100644 --- a/fs/open.c +++ b/fs/open.c @@ -905,7 +905,8 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; + f->f_mode = FMODE_PATH | FMODE_OPENED; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); f->f_op = &empty_fops; return 0; } @@ -935,10 +936,10 @@ static int do_dentry_open(struct file *f, /* * Set FMODE_NONOTIFY_* bits according to existing permission watches. - * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't - * change anything. + * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a + * pseudo file, this call will not change the mode. */ - file_set_fsnotify_mode(f); + file_set_fsnotify_mode_from_watchers(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; @@ -1122,7 +1123,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, if (!IS_ERR(f)) { int error; - f->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); error = vfs_open(path, f); if (error) { fput(f); diff --git a/fs/pidfs.c b/fs/pidfs.c index 049352f973de..63f9699ebac3 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd) switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: - case PIDFD_GET_INFO: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: @@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd) return true; } + /* Extensible ioctls require some more careful checks. */ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PIDFD_GET_INFO): + /* + * Try to prevent performing a pidfd ioctl when someone + * erronously mistook the file descriptor for a pidfd. + * This is not perfect but will catch most cases. + */ + return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); + } + return false; } diff --git a/fs/pipe.c b/fs/pipe.c index 94b59045ab44..ce1af7592780 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags) res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM); + file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM); return 0; } diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a68434ad744a..ac1f890a0d54 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -357,7 +357,7 @@ struct smb_version_operations { int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); @@ -552,12 +552,12 @@ struct smb_version_operations { /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ - void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, - bool *); + void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, + bool *purge_cache); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ - __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); + __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, @@ -1447,7 +1447,7 @@ struct cifs_fid { __u8 create_guid[16]; __u32 access; struct cifs_pending_open *pending_open; - unsigned int epoch; + __u16 epoch; #ifdef CONFIG_CIFS_DEBUG2 __u64 mid; #endif /* CIFS_DEBUG2 */ @@ -1480,7 +1480,7 @@ struct cifsFileInfo { bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ bool offload:1; /* offload final part of _put to a wq */ - unsigned int oplock_epoch; /* epoch from the lease break */ + __u16 oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; spinlock_t file_info_lock; /* protects four flag/count fields above */ @@ -1577,7 +1577,7 @@ struct cifsInodeInfo { spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ - unsigned int epoch; /* used to track lease state changes */ + __u16 epoch; /* used to track lease state changes */ #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */ diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index dad521336b5e..f65a8a90ba27 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -150,25 +150,27 @@ static int __dfs_referral_walk(struct dfs_ref_walk *rw) if (rc) continue; - if (tgt.flags & DFSREF_STORAGE_SERVER) { - rc = cifs_mount_get_tcon(mnt_ctx); - if (!rc) - rc = cifs_is_path_remote(mnt_ctx); + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) { + if (tgt.server_type == DFS_TYPE_LINK && + DFS_INTERLINK(tgt.flags)) + rc = -EREMOTE; + } else { + rc = cifs_is_path_remote(mnt_ctx); if (!rc) { ref_walk_set_tgt_hint(rw); break; } - if (rc != -EREMOTE) - continue; } - - rc = ref_walk_advance(rw); - if (!rc) { - rc = setup_dfs_ref(&tgt, rw); - if (rc) - break; - ref_walk_mark_end(rw); - goto again; + if (rc == -EREMOTE) { + rc = ref_walk_advance(rw); + if (!rc) { + rc = setup_dfs_ref(&tgt, rw); + if (rc) + break; + ref_walk_mark_end(rw); + goto again; + } } } } while (rc && ref_walk_descend(rw)); diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index ed4cd7cf1ec6..e60f0a24a8a1 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -188,4 +188,11 @@ static inline void dfs_put_root_smb_sessions(struct list_head *head) } } +static inline const char *dfs_ses_refpath(struct cifs_ses *ses) +{ + const char *path = ses->server->leaf_fullpath; + + return path ? path + 1 : ERR_PTR(-ENOENT); +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 5022bb1f122a..4dada26d56b5 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1136,33 +1136,19 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -static char *get_ses_refpath(struct cifs_ses *ses) -{ - struct TCP_Server_Info *server = ses->server; - char *path = ERR_PTR(-ENOENT); - - if (server->leaf_fullpath) { - path = kstrdup(server->leaf_fullpath + 1, GFP_KERNEL); - if (!path) - path = ERR_PTR(-ENOMEM); - } - return path; -} - /* Refresh dfs referral of @ses */ static void refresh_ses_referral(struct cifs_ses *ses) { struct cache_entry *ce; unsigned int xid; - char *path; + const char *path; int rc = 0; xid = get_xid(); - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1181,7 +1167,6 @@ static void refresh_ses_referral(struct cifs_ses *ses) out: free_xid(xid); - kfree(path); } static int __refresh_tcon_referral(struct cifs_tcon *tcon, @@ -1231,19 +1216,18 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) struct dfs_info3_param *refs = NULL; struct cache_entry *ce; struct cifs_ses *ses; - unsigned int xid; bool needs_refresh; - char *path; + const char *path; + unsigned int xid; int numrefs = 0; int rc = 0; xid = get_xid(); ses = tcon->ses; - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1271,7 +1255,6 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) out: free_xid(xid); - kfree(path); free_dfs_info_array(refs, numrefs); } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9756b876a75e..d6e2fb669c40 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -377,7 +377,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) static void cifs_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { cifs_set_oplock_level(cinode, oplock); } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 77309217dab4..ec36bed54b0b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3904,22 +3904,22 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, static void smb2_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { server->ops->set_oplock_level(cinode, oplock, 0, NULL); } static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); static void smb3_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_state = cinode->oplock; - unsigned int old_epoch = cinode->epoch; + __u16 old_epoch = cinode->epoch; unsigned int new_state; if (epoch > old_epoch) { @@ -3939,7 +3939,7 @@ smb3_downgrade_oplock(struct TCP_Server_Info *server, static void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { oplock &= 0xFF; cinode->lease_granted = false; @@ -3963,7 +3963,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { char message[5] = {0}; unsigned int new_oplock = 0; @@ -4000,7 +4000,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_oplock = cinode->oplock; @@ -4114,7 +4114,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) } static __u8 -smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb2_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease *lc = (struct create_lease *)buf; @@ -4125,7 +4125,7 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) } static __u8 -smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb3_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease_v2 *lc = (struct create_lease_v2 *)buf; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 40ad9e79437a..ed7812247ebc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2169,7 +2169,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon_error_exit: if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) - cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); + cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree); goto tcon_exit; } @@ -2329,7 +2329,7 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix) diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 2336dfb23f36..4662c7e2d259 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -283,7 +283,7 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix); diff --git a/fs/stat.c b/fs/stat.c index 2c0e111a098a..f13308bfdc98 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -281,6 +281,8 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, u32 request_mask) { int error = vfs_getattr(path, stat, request_mask, flags); + if (error) + return error; if (request_mask & STATX_MNT_ID_UNIQUE) { stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; @@ -302,7 +304,7 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, if (S_ISBLK(stat->mode)) bdev_statx(path, stat, request_mask); - return error; + return 0; } static int vfs_statx_fd(int fd, int flags, struct kstat *stat, diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index e95b8a48d8a0..1d94bb784108 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,7 +21,8 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', + '\375' }; static int follow_symlinks; module_param(follow_symlinks, int, 0444); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 40ad22fb808b..0ef19f1469ec 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1d4a0a22e13..15bb790359f8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -220,23 +219,25 @@ _xfs_buf_alloc( */ flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); - spin_lock_init(&bp->b_lock); + /* + * A new buffer is held and locked by the owner. This ensures that the + * buffer is owned by the caller and racing RCU lookups right after + * inserting into the hash table are safe (and will have to wait for + * the unlock to do anything non-trivial). + */ bp->b_hold = 1; + sema_init(&bp->b_sema, 0); /* held, no waiters */ + + spin_lock_init(&bp->b_lock); atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_li_list); - sema_init(&bp->b_sema, 0); /* held, no waiters */ bp->b_target = target; bp->b_mount = target->bt_mount; bp->b_flags = flags; - /* - * Set length and io_length to the same value initially. - * I/O routines should use io_length, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_cache, bp); @@ -502,7 +503,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -652,17 +652,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -670,10 +673,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1090,7 +1091,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1102,7 +1102,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1120,7 +1119,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a..3b4ed42e11c0 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; }; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index f340a2015c4c..0b41bdfecdfb 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -329,22 +329,6 @@ xfs_exchrange_mappings( * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -355,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -368,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -401,15 +388,6 @@ xfs_exchange_range_checks( check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) return -EINVAL; - /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole @@ -747,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -779,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c95fe1b1de4e..b1f9f156ec88 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1404,8 +1404,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c..d61460309a78 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -976,10 +976,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 54504013c749..02a4adb4a999 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1038,6 +1038,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.discard) \ *(.discard.*) \ *(.export_symbol) \ + *(.no_trim_symbol) \ *(.modinfo) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index f77fe1531cf8..9732f514566d 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -32,6 +32,7 @@ #include <linux/dynamic_debug.h> #include <drm/drm.h> +#include <drm/drm_device.h> struct debugfs_regset32; struct drm_device; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b087de2f3e94..200fd3c5bc70 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -191,6 +191,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#else /* __CHECKER__ */ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif /* __CHECKER__ */ + +/* &a[0] degrades to a pointer: a different type from an array */ +#define __is_array(a) (!__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \ + "must be array") + +#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1) +#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \ + "must be byte array") + +/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ +#define __must_be_cstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") + #endif /* __KERNEL__ */ /** @@ -231,19 +250,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) -#ifdef __CHECKER__ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) -#else /* __CHECKER__ */ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) -#endif /* __CHECKER__ */ - -/* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") - -/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ -#define __must_be_cstr(p) \ - __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..2c3b2f8a621f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -222,7 +222,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_FSNOTIFY_HSM(mode) 0 #endif - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -791,6 +790,19 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + int testlen; + + /* + * TODO: patch it into a debug-only check if relevant macros show up. + * In the meantime, since we are suffering strlen even on production kernels + * to find the right length, do a fixup if the wrong value got passed. + */ + testlen = strlen(link); + if (testlen != linklen) { + WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", + link, linklen, testlen); + linklen = testlen; + } inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; @@ -3140,6 +3152,12 @@ static inline void exe_file_allow_write_access(struct file *exe_file) allow_write_access(exe_file); } +static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) +{ + file->f_mode &= ~FMODE_FSNOTIFY_MASK; + file->f_mode |= mode; +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1a9ef8f6784d..6a33288bd6a1 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS -void file_set_fsnotify_mode(struct file *file); +void file_set_fsnotify_mode_from_watchers(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range @@ -213,7 +213,7 @@ static inline int fsnotify_open_perm(struct file *file) } #else -static inline void file_set_fsnotify_mode(struct file *file) +static inline void file_set_fsnotify_mode_from_watchers(struct file *file) { } diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index c3b4b7ed7c16..84a5045f80f3 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -125,6 +125,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c31fd1dba3bd..2b2af24d2a43 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -244,6 +244,7 @@ enum i2c_driver_flags { * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) + * @clients: List of detected clients we created (for i2c-core use only) * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. @@ -298,6 +299,7 @@ struct i2c_driver { /* Device detection callback for automatic device creation */ int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; + struct list_head clients; u32 flags; }; @@ -313,6 +315,8 @@ struct i2c_driver { * @dev: Driver model device node for the slave. * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) + * @detected: member of an i2c_driver.clients list or i2c-core's + * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources @@ -332,8 +336,6 @@ struct i2c_client { #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ -#define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */ -#define I2C_CLIENT_USER 0x200 /* client was userspace-created */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ @@ -345,6 +347,7 @@ struct i2c_client { struct device dev; /* the device structure */ int init_irq; /* irq set at initialization */ int irq; /* irq issued by device */ + struct list_head detected; #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif @@ -751,6 +754,9 @@ struct i2c_adapter { char name[48]; struct completion dev_released; + struct mutex userspace_clients_lock; + struct list_head userspace_clients; + struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index ed945f42e064..0ea8c9887429 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) * * Return: jiffies value */ -#define secs_to_jiffies(_secs) ((_secs) * HZ) +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3cb9a32a6330..f34f4cfaa513 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1615,7 +1615,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c39f119659ba..676721ee878d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -37,12 +37,13 @@ struct lockref { /** * lockref_init - Initialize a lockref * @lockref: pointer to lockref structure - * @count: initial count + * + * Initializes @lockref->count to 1. */ -static inline void lockref_init(struct lockref *lockref, unsigned int count) +static inline void lockref_init(struct lockref *lockref) { spin_lock_init(&lockref->lock); - lockref->count = count; + lockref->count = 1; } void lockref_get(struct lockref *lockref); diff --git a/include/linux/module.h b/include/linux/module.h index 23792d5d7b74..30e5b19bafa9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -306,7 +306,10 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) /* modules using other modules: kdb wants to see this. */ struct module_use { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a59034a5fa2..03bb584c62cf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2904,9 +2904,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1aab31370065..d56a78af4af1 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -261,6 +261,7 @@ struct generic_pm_domain_data { unsigned int rpm_pstate; unsigned int opp_token; bool hw_mode; + bool rpm_always_on; void *data; }; @@ -293,6 +294,7 @@ ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); void dev_pm_genpd_synced_poweroff(struct device *dev); int dev_pm_genpd_set_hwmode(struct device *dev, bool enable); bool dev_pm_genpd_get_hwmode(struct device *dev); +int dev_pm_genpd_rpm_always_on(struct device *dev, bool on); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -376,6 +378,11 @@ static inline bool dev_pm_genpd_get_hwmode(struct device *dev) return false; } +static inline int dev_pm_genpd_rpm_always_on(struct device *dev, bool on) +{ + return -EOPNOTSUPP; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif diff --git a/include/linux/string.h b/include/linux/string.h index 86d5d352068b..f8e21e80942f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -414,7 +414,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem_pad(dest, src, pad) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -437,7 +438,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -456,7 +458,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ @@ -481,7 +484,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr_pad(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d635c5b47eba..d48c657191cd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h index 7c8ba9d7378b..ef53d4bea28a 100644 --- a/include/scsi/libiscsi_tcp.h +++ b/include/scsi/libiscsi_tcp.h @@ -15,7 +15,6 @@ struct iscsi_tcp_conn; struct iscsi_segment; struct sk_buff; -struct ahash_request; typedef int iscsi_segment_done_fn_t(struct iscsi_tcp_conn *, struct iscsi_segment *); @@ -27,7 +26,7 @@ struct iscsi_segment { unsigned int total_size; unsigned int total_copied; - struct ahash_request *hash; + u32 *crcp; unsigned char padbuf[ISCSI_PAD_LEN]; unsigned char recv_digest[ISCSI_DIGEST_SIZE]; unsigned char digest[ISCSI_DIGEST_SIZE]; @@ -61,8 +60,8 @@ struct iscsi_tcp_conn { * stop to terminate */ /* control data */ struct iscsi_tcp_recv in; /* TCP receive context */ - /* CRC32C (Rx) LLD should set this is they do not offload */ - struct ahash_request *rx_hash; + /* CRC32C (Rx) LLD should set this if they do not offload */ + u32 *rx_crcp; }; struct iscsi_tcp_task { @@ -99,18 +98,15 @@ extern void iscsi_tcp_segment_unmap(struct iscsi_segment *segment); extern void iscsi_segment_init_linear(struct iscsi_segment *segment, void *data, size_t size, - iscsi_segment_done_fn_t *done, - struct ahash_request *hash); + iscsi_segment_done_fn_t *done, u32 *crcp); extern int iscsi_segment_seek_sg(struct iscsi_segment *segment, struct scatterlist *sg_list, unsigned int sg_count, unsigned int offset, size_t size, - iscsi_segment_done_fn_t *done, - struct ahash_request *hash); + iscsi_segment_done_fn_t *done, u32 *crcp); /* digest helpers */ -extern void iscsi_tcp_dgst_header(struct ahash_request *hash, const void *hdr, - size_t hdrlen, +extern void iscsi_tcp_dgst_header(const void *hdr, size_t hdrlen, unsigned char digest[ISCSI_DIGEST_SIZE]); extern struct iscsi_cls_conn * iscsi_tcp_conn_setup(struct iscsi_cls_session *cls_session, int dd_data_size, diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 7acd0ec82bb0..68dd49947d04 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -247,6 +247,9 @@ struct scsi_device { unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ + unsigned int ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ + unsigned int ua_por_ctr; /* Counter for Power On / Reset UAs */ + atomic_t disk_events_disable_depth; /* disable depth for disk events */ DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */ @@ -684,6 +687,12 @@ static inline int scsi_device_busy(struct scsi_device *sdev) return sbitmap_weight(&sdev->budget_map); } +/* Macros to access the UNIT ATTENTION counters */ +#define scsi_get_ua_new_media_ctr(sdev) \ + ((const unsigned int)(sdev->ua_new_media_ctr)) +#define scsi_get_ua_por_ctr(sdev) \ + ((const unsigned int)(sdev->ua_por_ctr)) + #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") #define SCSI_DEVICE_MODALIAS_FMT "scsi:t-0x%02x" diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 70e1262b2e20..aeca37816506 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -33,8 +33,8 @@ #define INQUIRY 0x12 #define RECOVER_BUFFERED_DATA 0x14 #define MODE_SELECT 0x15 -#define RESERVE 0x16 -#define RELEASE 0x17 +#define RESERVE_6 0x16 +#define RELEASE_6 0x17 #define COPY 0x18 #define ERASE 0x19 #define MODE_SENSE 0x1a diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h index c46a9ae2a2ab..501ad1fedb20 100644 --- a/include/soc/rockchip/rockchip_sip.h +++ b/include/soc/rockchip/rockchip_sip.h @@ -6,6 +6,9 @@ #ifndef __SOC_ROCKCHIP_SIP_H #define __SOC_ROCKCHIP_SIP_H +#define ROCKCHIP_SIP_SUSPEND_MODE 0x82000003 +#define ROCKCHIP_SLEEP_PD_CONFIG 0xff + #define ROCKCHIP_SIP_DRAM_FREQ 0x82000008 #define ROCKCHIP_SIP_CONFIG_DRAM_INIT 0x00 #define ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE 0x01 diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2f119d18a061..cad50d91077e 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -219,6 +219,7 @@ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h index 05f1945ed204..bf6cc98d9122 100644 --- a/include/trace/events/scsi.h +++ b/include/trace/events/scsi.h @@ -29,8 +29,8 @@ scsi_opcode_name(INQUIRY), \ scsi_opcode_name(RECOVER_BUFFERED_DATA), \ scsi_opcode_name(MODE_SELECT), \ - scsi_opcode_name(RESERVE), \ - scsi_opcode_name(RELEASE), \ + scsi_opcode_name(RESERVE_6), \ + scsi_opcode_name(RELEASE_6), \ scsi_opcode_name(COPY), \ scsi_opcode_name(ERASE), \ scsi_opcode_name(MODE_SENSE), \ diff --git a/include/trace/events/target.h b/include/trace/events/target.h index a13cbf2b3405..7e2e20ba26f1 100644 --- a/include/trace/events/target.h +++ b/include/trace/events/target.h @@ -31,8 +31,8 @@ scsi_opcode_name(INQUIRY), \ scsi_opcode_name(RECOVER_BUFFERED_DATA), \ scsi_opcode_name(MODE_SELECT), \ - scsi_opcode_name(RESERVE), \ - scsi_opcode_name(RELEASE), \ + scsi_opcode_name(RESERVE_6), \ + scsi_opcode_name(RELEASE_6), \ scsi_opcode_name(COPY), \ scsi_opcode_name(ERASE), \ scsi_opcode_name(MODE_SENSE), \ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index efe5de6ce208..aaa4f3bc688b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -411,13 +411,20 @@ struct drm_amdgpu_gem_userptr { /* GFX12 and later: */ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 -/* These are DCC recompression setting for memory management: */ +/* These are DCC recompression settings for memory management: */ #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ diff --git a/include/uapi/scsi/cxlflash_ioctl.h b/include/uapi/scsi/cxlflash_ioctl.h deleted file mode 100644 index 513da47aa5ab..000000000000 --- a/include/uapi/scsi/cxlflash_ioctl.h +++ /dev/null @@ -1,276 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -/* - * CXL Flash Device Driver - * - * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation - * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation - * - * Copyright (C) 2015 IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _CXLFLASH_IOCTL_H -#define _CXLFLASH_IOCTL_H - -#include <linux/types.h> - -/* - * Structure and definitions for all CXL Flash ioctls - */ -#define CXLFLASH_WWID_LEN 16 - -/* - * Structure and flag definitions CXL Flash superpipe ioctls - */ - -#define DK_CXLFLASH_VERSION_0 0 - -struct dk_cxlflash_hdr { - __u16 version; /* Version data */ - __u16 rsvd[3]; /* Reserved for future use */ - __u64 flags; /* Input flags */ - __u64 return_flags; /* Returned flags */ -}; - -/* - * Return flag definitions available to all superpipe ioctls - * - * Similar to the input flags, these are grown from the bottom-up with the - * intention that ioctl-specific return flag definitions would grow from the - * top-down, allowing the two sets to co-exist. While not required/enforced - * at this time, this provides future flexibility. - */ -#define DK_CXLFLASH_ALL_PORTS_ACTIVE 0x0000000000000001ULL -#define DK_CXLFLASH_APP_CLOSE_ADAP_FD 0x0000000000000002ULL -#define DK_CXLFLASH_CONTEXT_SQ_CMD_MODE 0x0000000000000004ULL - -/* - * General Notes: - * ------------- - * The 'context_id' field of all ioctl structures contains the context - * identifier for a context in the lower 32-bits (upper 32-bits are not - * to be used when identifying a context to the AFU). That said, the value - * in its entirety (all 64-bits) is to be treated as an opaque cookie and - * should be presented as such when issuing ioctls. - */ - -/* - * DK_CXLFLASH_ATTACH Notes: - * ------------------------ - * Read/write access permissions are specified via the O_RDONLY, O_WRONLY, - * and O_RDWR flags defined in the fcntl.h header file. - * - * A valid adapter file descriptor (fd >= 0) is only returned on the initial - * attach (successful) of a context. When a context is shared(reused), the user - * is expected to already 'know' the adapter file descriptor associated with the - * context. - */ -#define DK_CXLFLASH_ATTACH_REUSE_CONTEXT 0x8000000000000000ULL - -struct dk_cxlflash_attach { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 num_interrupts; /* Requested number of interrupts */ - __u64 context_id; /* Returned context */ - __u64 mmio_size; /* Returned size of MMIO area */ - __u64 block_size; /* Returned block size, in bytes */ - __u64 adap_fd; /* Returned adapter file descriptor */ - __u64 last_lba; /* Returned last LBA on the device */ - __u64 max_xfer; /* Returned max transfer size, blocks */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -struct dk_cxlflash_detach { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context to detach */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -struct dk_cxlflash_udirect { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context to own physical resources */ - __u64 rsrc_handle; /* Returned resource handle */ - __u64 last_lba; /* Returned last LBA on the device */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -#define DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME 0x8000000000000000ULL - -struct dk_cxlflash_uvirtual { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context to own virtual resources */ - __u64 lun_size; /* Requested size, in 4K blocks */ - __u64 rsrc_handle; /* Returned resource handle */ - __u64 last_lba; /* Returned last LBA of LUN */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -struct dk_cxlflash_release { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context owning resources */ - __u64 rsrc_handle; /* Resource handle to release */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -struct dk_cxlflash_resize { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context owning resources */ - __u64 rsrc_handle; /* Resource handle of LUN to resize */ - __u64 req_size; /* New requested size, in 4K blocks */ - __u64 last_lba; /* Returned last LBA of LUN */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -struct dk_cxlflash_clone { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id_src; /* Context to clone from */ - __u64 context_id_dst; /* Context to clone to */ - __u64 adap_fd_src; /* Source context adapter fd */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -#define DK_CXLFLASH_VERIFY_SENSE_LEN 18 -#define DK_CXLFLASH_VERIFY_HINT_SENSE 0x8000000000000000ULL - -struct dk_cxlflash_verify { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 context_id; /* Context owning resources to verify */ - __u64 rsrc_handle; /* Resource handle of LUN */ - __u64 hint; /* Reasons for verify */ - __u64 last_lba; /* Returned last LBA of device */ - __u8 sense_data[DK_CXLFLASH_VERIFY_SENSE_LEN]; /* SCSI sense data */ - __u8 pad[6]; /* Pad to next 8-byte boundary */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -#define DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET 0x8000000000000000ULL - -struct dk_cxlflash_recover_afu { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u64 reason; /* Reason for recovery request */ - __u64 context_id; /* Context to recover / updated ID */ - __u64 mmio_size; /* Returned size of MMIO area */ - __u64 adap_fd; /* Returned adapter file descriptor */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -#define DK_CXLFLASH_MANAGE_LUN_WWID_LEN CXLFLASH_WWID_LEN -#define DK_CXLFLASH_MANAGE_LUN_ENABLE_SUPERPIPE 0x8000000000000000ULL -#define DK_CXLFLASH_MANAGE_LUN_DISABLE_SUPERPIPE 0x4000000000000000ULL -#define DK_CXLFLASH_MANAGE_LUN_ALL_PORTS_ACCESSIBLE 0x2000000000000000ULL - -struct dk_cxlflash_manage_lun { - struct dk_cxlflash_hdr hdr; /* Common fields */ - __u8 wwid[DK_CXLFLASH_MANAGE_LUN_WWID_LEN]; /* Page83 WWID, NAA-6 */ - __u64 reserved[8]; /* Rsvd, future use */ -}; - -union cxlflash_ioctls { - struct dk_cxlflash_attach attach; - struct dk_cxlflash_detach detach; - struct dk_cxlflash_udirect udirect; - struct dk_cxlflash_uvirtual uvirtual; - struct dk_cxlflash_release release; - struct dk_cxlflash_resize resize; - struct dk_cxlflash_clone clone; - struct dk_cxlflash_verify verify; - struct dk_cxlflash_recover_afu recover_afu; - struct dk_cxlflash_manage_lun manage_lun; -}; - -#define MAX_CXLFLASH_IOCTL_SZ (sizeof(union cxlflash_ioctls)) - -#define CXL_MAGIC 0xCA -#define CXL_IOWR(_n, _s) _IOWR(CXL_MAGIC, _n, struct _s) - -/* - * CXL Flash superpipe ioctls start at base of the reserved CXL_MAGIC - * region (0x80) and grow upwards. - */ -#define DK_CXLFLASH_ATTACH CXL_IOWR(0x80, dk_cxlflash_attach) -#define DK_CXLFLASH_USER_DIRECT CXL_IOWR(0x81, dk_cxlflash_udirect) -#define DK_CXLFLASH_RELEASE CXL_IOWR(0x82, dk_cxlflash_release) -#define DK_CXLFLASH_DETACH CXL_IOWR(0x83, dk_cxlflash_detach) -#define DK_CXLFLASH_VERIFY CXL_IOWR(0x84, dk_cxlflash_verify) -#define DK_CXLFLASH_RECOVER_AFU CXL_IOWR(0x85, dk_cxlflash_recover_afu) -#define DK_CXLFLASH_MANAGE_LUN CXL_IOWR(0x86, dk_cxlflash_manage_lun) -#define DK_CXLFLASH_USER_VIRTUAL CXL_IOWR(0x87, dk_cxlflash_uvirtual) -#define DK_CXLFLASH_VLUN_RESIZE CXL_IOWR(0x88, dk_cxlflash_resize) -#define DK_CXLFLASH_VLUN_CLONE CXL_IOWR(0x89, dk_cxlflash_clone) - -/* - * Structure and flag definitions CXL Flash host ioctls - */ - -#define HT_CXLFLASH_VERSION_0 0 - -struct ht_cxlflash_hdr { - __u16 version; /* Version data */ - __u16 subcmd; /* Sub-command */ - __u16 rsvd[2]; /* Reserved for future use */ - __u64 flags; /* Input flags */ - __u64 return_flags; /* Returned flags */ -}; - -/* - * Input flag definitions available to all host ioctls - * - * These are grown from the bottom-up with the intention that ioctl-specific - * input flag definitions would grow from the top-down, allowing the two sets - * to co-exist. While not required/enforced at this time, this provides future - * flexibility. - */ -#define HT_CXLFLASH_HOST_READ 0x0000000000000000ULL -#define HT_CXLFLASH_HOST_WRITE 0x0000000000000001ULL - -#define HT_CXLFLASH_LUN_PROVISION_SUBCMD_CREATE_LUN 0x0001 -#define HT_CXLFLASH_LUN_PROVISION_SUBCMD_DELETE_LUN 0x0002 -#define HT_CXLFLASH_LUN_PROVISION_SUBCMD_QUERY_PORT 0x0003 - -struct ht_cxlflash_lun_provision { - struct ht_cxlflash_hdr hdr; /* Common fields */ - __u16 port; /* Target port for provision request */ - __u16 reserved16[3]; /* Reserved for future use */ - __u64 size; /* Size of LUN (4K blocks) */ - __u64 lun_id; /* SCSI LUN ID */ - __u8 wwid[CXLFLASH_WWID_LEN];/* Page83 WWID, NAA-6 */ - __u64 max_num_luns; /* Maximum number of LUNs provisioned */ - __u64 cur_num_luns; /* Current number of LUNs provisioned */ - __u64 max_cap_port; /* Total capacity for port (4K blocks) */ - __u64 cur_cap_port; /* Current capacity for port (4K blocks) */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -#define HT_CXLFLASH_AFU_DEBUG_MAX_DATA_LEN 262144 /* 256K */ -#define HT_CXLFLASH_AFU_DEBUG_SUBCMD_LEN 12 -struct ht_cxlflash_afu_debug { - struct ht_cxlflash_hdr hdr; /* Common fields */ - __u8 reserved8[4]; /* Reserved for future use */ - __u8 afu_subcmd[HT_CXLFLASH_AFU_DEBUG_SUBCMD_LEN]; /* AFU subcommand, - * (pass through) - */ - __u64 data_ea; /* Data buffer effective address */ - __u32 data_len; /* Data buffer length */ - __u32 reserved32; /* Reserved for future use */ - __u64 reserved[8]; /* Reserved for future use */ -}; - -union cxlflash_ht_ioctls { - struct ht_cxlflash_lun_provision lun_provision; - struct ht_cxlflash_afu_debug afu_debug; -}; - -#define MAX_HT_CXLFLASH_IOCTL_SZ (sizeof(union cxlflash_ht_ioctls)) - -/* - * CXL Flash host ioctls start at the top of the reserved CXL_MAGIC - * region (0xBF) and grow downwards. - */ -#define HT_CXLFLASH_LUN_PROVISION CXL_IOWR(0xBF, ht_cxlflash_lun_provision) -#define HT_CXLFLASH_AFU_DEBUG CXL_IOWR(0xBE, ht_cxlflash_afu_debug) - - -#endif /* ifndef _CXLFLASH_IOCTL_H */ diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 89672ad8c3bb..8a24ed59ec46 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -385,8 +385,8 @@ enum { /* Possible values for dExtendedUFSFeaturesSupport */ enum { - UFS_DEV_LOW_TEMP_NOTIF = BIT(4), - UFS_DEV_HIGH_TEMP_NOTIF = BIT(5), + UFS_DEV_HIGH_TEMP_NOTIF = BIT(4), + UFS_DEV_LOW_TEMP_NOTIF = BIT(5), UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), @@ -419,6 +419,7 @@ enum { MASK_EE_TOO_LOW_TEMP = BIT(4), MASK_EE_WRITEBOOSTER_EVENT = BIT(5), MASK_EE_PERFORMANCE_THROTTLING = BIT(6), + MASK_EE_HEALTH_CRITICAL = BIT(9), }; #define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 650ff238cd74..e3909cc691b2 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -336,6 +336,7 @@ struct ufs_pwr_mode_info { * @get_outstanding_cqs: called to get outstanding completion queues * @config_esi: called to config Event Specific Interrupt * @config_scsi_dev: called to configure SCSI device parameters + * @freq_to_gear_speed: called to map clock frequency to the max supported gear speed */ struct ufs_hba_variant_ops { const char *name; @@ -344,8 +345,8 @@ struct ufs_hba_variant_ops { void (*exit)(struct ufs_hba *); u32 (*get_ufs_hci_version)(struct ufs_hba *); int (*set_dma_mask)(struct ufs_hba *); - int (*clk_scale_notify)(struct ufs_hba *, bool, - enum ufs_notify_change_status); + int (*clk_scale_notify)(struct ufs_hba *, bool, unsigned long, + enum ufs_notify_change_status); int (*setup_clocks)(struct ufs_hba *, bool, enum ufs_notify_change_status); int (*hce_enable_notify)(struct ufs_hba *, @@ -353,9 +354,9 @@ struct ufs_hba_variant_ops { int (*link_startup_notify)(struct ufs_hba *, enum ufs_notify_change_status); int (*pwr_change_notify)(struct ufs_hba *, - enum ufs_notify_change_status status, - struct ufs_pa_layer_attr *desired_pwr_mode, - struct ufs_pa_layer_attr *final_params); + enum ufs_notify_change_status status, + const struct ufs_pa_layer_attr *desired_pwr_mode, + struct ufs_pa_layer_attr *final_params); void (*setup_xfer_req)(struct ufs_hba *hba, int tag, bool is_scsi_cmd); void (*setup_task_mgmt)(struct ufs_hba *, int, u8); @@ -384,6 +385,7 @@ struct ufs_hba_variant_ops { unsigned long *ocqs); int (*config_esi)(struct ufs_hba *hba); void (*config_scsi_dev)(struct scsi_device *sdev); + u32 (*freq_to_gear_speed)(struct ufs_hba *hba, unsigned long freq); }; /* clock gating state */ @@ -448,6 +450,8 @@ struct ufs_clk_gating { * one keeps track of previous power mode. * @target_freq: frequency requested by devfreq framework * @min_gear: lowest HS gear to scale down to + * @wb_gear: enable Write Booster when HS gear scales above or equal to it, else + * disable Write Booster * @is_enabled: tracks if scaling is currently enabled or not, controlled by * clkscale_enable sysfs node * @is_allowed: tracks if scaling is currently allowed or not, used to block @@ -471,6 +475,7 @@ struct ufs_clk_scaling { struct ufs_pa_layer_attr saved_pwr_info; unsigned long target_freq; u32 min_gear; + u32 wb_gear; bool is_enabled; bool is_allowed; bool is_initialized; @@ -962,6 +967,7 @@ enum ufshcd_mcq_opr { * @ufs_rtc_update_work: A work for UFS RTC periodic update * @pm_qos_req: PM QoS request handle * @pm_qos_enabled: flag to check if pm qos is enabled + * @critical_health_count: count of critical health exceptions */ struct ufs_hba { void __iomem *mmio_base; @@ -1130,6 +1136,8 @@ struct ufs_hba { struct delayed_work ufs_rtc_update_work; struct pm_qos_request pm_qos_req; bool pm_qos_enabled; + + int critical_health_count; }; /** @@ -1309,7 +1317,6 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) void ufshcd_enable_irq(struct ufs_hba *hba); void ufshcd_disable_irq(struct ufs_hba *hba); int ufshcd_alloc_host(struct device *, struct ufs_hba **); -void ufshcd_dealloc_host(struct ufs_hba *); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); int ufshcd_link_recovery(struct ufs_hba *hba); @@ -1369,6 +1376,8 @@ extern int ufshcd_system_thaw(struct device *dev); extern int ufshcd_system_restore(struct device *dev); #endif +extern int ufshcd_dme_reset(struct ufs_hba *hba); +extern int ufshcd_dme_enable(struct ufs_hba *hba); extern int ufshcd_dme_configure_adapt(struct ufs_hba *hba, int agreed_gear, int adapt_val); @@ -1426,7 +1435,7 @@ static inline int ufshcd_dme_peer_get(struct ufs_hba *hba, return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_PEER); } -static inline bool ufshcd_is_hs_mode(struct ufs_pa_layer_attr *pwr_info) +static inline bool ufshcd_is_hs_mode(const struct ufs_pa_layer_attr *pwr_info) { return (pwr_info->pwr_rx == FAST_MODE || pwr_info->pwr_rx == FASTAUTO_MODE) && diff --git a/io_uring/futex.c b/io_uring/futex.c index 3159a2b7eeca..43e2143255f5 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -338,7 +338,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) hlist_add_head(&req->hash_node, &ctx->futex_list); io_ring_submit_unlock(ctx, issue_flags); - futex_queue(&ifd->q, hb); + futex_queue(&ifd->q, hb, NULL); return IOU_ISSUE_SKIP_COMPLETE; } diff --git a/kernel/futex/core.c b/kernel/futex/core.c index ebdd76b4ecbb..3db8567f5a44 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -532,7 +532,8 @@ void futex_q_unlock(struct futex_hash_bucket *hb) futex_hb_waiters_dec(hb); } -void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) { int prio; @@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); - q->task = current; + q->task = task; } /** diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 99b32e728c4a..6b2f4c7eb720 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -285,13 +285,15 @@ static inline int futex_get_value_locked(u32 *dest, u32 __user *from) } extern void __futex_unqueue(struct futex_q *q); -extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); +extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task); extern int futex_unqueue(struct futex_q *q); /** * futex_queue() - Enqueue the futex_q on the futex_hash_bucket * @q: The futex_q to enqueue * @hb: The destination hash bucket + * @task: Task queueing this futex * * The hb->lock must be held by the caller, and is released here. A call to * futex_queue() is typically paired with exactly one call to futex_unqueue(). The @@ -299,11 +301,14 @@ extern int futex_unqueue(struct futex_q *q); * or nothing if the unqueue is done as part of the wake process and the unqueue * state is implicit in the state of woken task (see futex_wait_requeue_pi() for * an example). + * + * Note that @task may be NULL, for async usage of futexes. */ -static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) __releases(&hb->lock) { - __futex_queue(q, hb); + __futex_queue(q, hb, task); spin_unlock(&hb->lock); } diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index daea650b16f5..7a941845f7ee 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -982,7 +982,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl /* * Only actually queue now that the atomic ops are done: */ - __futex_queue(&q, hb); + __futex_queue(&q, hb, current); if (trylock) { ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index eb86a7ade06a..25877d4f2f8f 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -349,7 +349,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); - futex_queue(q, hb); + futex_queue(q, hb, current); /* Arm the timer */ if (timeout) @@ -460,7 +460,7 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) * next futex. Queue each futex at this moment so hb can * be unlocked. */ - futex_queue(q, hb); + futex_queue(q, hb, current); continue; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 4005b13ebd7f..5dc5b0d7238e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; unsigned long flags; - int ret; + int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { WARN_ON(1); @@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) out: free_cpumask_var(affinity); - return 0; + return ret; } /* diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index fd7e85220715..ef047add7f9e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1262,6 +1262,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, if (task_has_dl_policy(p)) { P(dl.runtime); P(dl.deadline); + } else if (fair_policy(p->policy)) { + P(se.slice); } #ifdef CONFIG_SCHED_CLASS_EXT __PS("ext.enabled", task_on_scx(p)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ce2e94ccad0c..1c0ef435a7aa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5385,6 +5385,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void set_delayed(struct sched_entity *se) { se->sched_delayed = 1; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since dequeue_entities() + * will account it for blocked tasks. + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); @@ -5397,6 +5406,16 @@ static void set_delayed(struct sched_entity *se) static void clear_delayed(struct sched_entity *se) { se->sched_delayed = 0; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since a dequeue has + * already accounted for it or an enqueue of a task + * below it will account for it in enqueue_task_fair(). + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f59381c4a2ff..7bbb408431eb 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog, if (WARN_ON_ONCE(!fprog)) return false; + /* Our single exception to filtering. */ +#ifdef __NR_uretprobe +#ifdef SECCOMP_ARCH_COMPAT + if (sd->arch == SECCOMP_ARCH_NATIVE) +#endif + if (sd->nr == __NR_uretprobe) + return true; +#endif + for (pc = 0; pc < fprog->len; pc++) { struct sock_filter *insn = &fprog->filter[pc]; u16 code = insn->code; @@ -1023,6 +1032,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, */ static const int mode1_syscalls[] = { __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, +#ifdef __NR_uretprobe + __NR_uretprobe, +#endif -1, /* negative terminated */ }; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7304d7cf47f2..2a7802ec480c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -373,16 +373,18 @@ void clocksource_verify_percpu(struct clocksource *cs) cpumask_clear(&cpus_ahead); cpumask_clear(&cpus_behind); cpus_read_lock(); - preempt_disable(); + migrate_disable(); clocksource_verify_choose_cpus(); if (cpumask_empty(&cpus_chosen)) { - preempt_enable(); + migrate_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + preempt_disable(); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; @@ -402,6 +404,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cs_nsec_min = cs_nsec; } preempt_enable(); + migrate_enable(); cpus_read_unlock(); if (!cpumask_empty(&cpus_ahead)) pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d8df94045c..deb1aa32814e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -145,11 +156,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -183,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -254,8 +287,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -264,8 +297,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -275,11 +307,6 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) @@ -716,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1205,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1216,9 +1242,15 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; + /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the @@ -1248,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid @@ -1370,6 +1421,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 9cb9b6584ea1..2f6330831f08 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -1675,6 +1675,9 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) } while (i < tmigr_hierarchy_levels); + /* Assert single root */ + WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); + while (i > 0) { group = stack[--i]; @@ -1716,7 +1719,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) WARN_ON_ONCE(top == 0); lvllist = &tmigr_level_list[top]; - if (group->num_children == 1 && list_is_singular(lvllist)) { + + /* + * Newly created root level should have accounted the upcoming + * CPU's child group and pre-accounted the old root. + */ + if (group->num_children == 2 && list_is_singular(lvllist)) { /* * The target CPU must never do the prepare work, except * on early boot when the boot CPU is the target. Otherwise diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 54d850997c0a..136c750b0b4d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - *task_var |= TRACE_GRAPH_NOTRACE_BIT; + *task_var |= TRACE_GRAPH_NOTRACE; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index fbe910c9c825..135322592faf 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -75,8 +75,10 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, */ #ifdef CONFIG_M68K #define FILL_SIZE_STRING 8 +#define FILL_SIZE_ARRAY 2 #else #define FILL_SIZE_STRING 16 +#define FILL_SIZE_ARRAY 8 #endif #define INIT_CLONE_SCALAR /**/ @@ -345,11 +347,11 @@ union test_small_start { short three; unsigned long four; struct big_struct { - unsigned long array[8]; + unsigned long array[FILL_SIZE_ARRAY]; } big; }; -/* Mismatched sizes, with one and two being small */ +/* Mismatched sizes, with three and four being small */ union test_small_end { short one; unsigned long two; diff --git a/net/core/dev.c b/net/core/dev.c index c0021cbd28fc..b91658e8aedb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11286,6 +11286,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e1247..7609ce2b2c5e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590ab..58df9ad02ce8 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf6..a9bb9ce5438e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc255..2c383c12a431 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,13 +407,15 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) @@ -426,8 +428,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfe..0ac4283acdf2 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -269,8 +272,10 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc263..33833b2064c0 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); @@ -571,13 +573,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +597,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4f..c6ea438b5c75 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a15..a4a668b88a8f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e..f251845fe532 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb0..c4c8b46a68c6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb..4d9c5e21ba78 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a38..2f1fd1e2e7e4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa..9047ba13bd31 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2..584397aba4a0 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09b..e6bfd39ff339 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37..fdd79d3ccd8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/net/socket.c b/net/socket.c index 262a28b59c7f..28bae5a94234 100644 --- a/net/socket.c +++ b/net/socket.c @@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); diff --git a/rust/Makefile b/rust/Makefile index 8fcfd60447bc..ea3849eb78f6 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -144,7 +144,7 @@ rusttestlib-kernel: private rustc_target_flags = --extern ffi \ --extern bindings --extern uapi rusttestlib-kernel: $(src)/kernel/lib.rs \ rusttestlib-bindings rusttestlib-uapi rusttestlib-build_error \ - $(obj)/libmacros.so $(obj)/bindings.o FORCE + $(obj)/$(libmacros_name) $(obj)/bindings.o FORCE +$(call if_changed,rustc_test_library) rusttestlib-bindings: private rustc_target_flags = --extern ffi @@ -240,6 +240,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ + -fzero-init-padding-bits=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -331,7 +332,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) -rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }' quiet_cmd_exports = EXPORTS $@ cmd_exports = \ diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 3f9236c1c9d5..7fd1ea8265a5 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -870,7 +870,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized { /// use kernel::{types::Opaque, init::pin_init_from_closure}; /// #[repr(C)] /// struct RawFoo([u8; 16]); - /// extern { + /// extern "C" { /// fn init_foo(_: *mut RawFoo); /// } /// diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d53..dc081cf46d21 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -31,6 +31,11 @@ KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds ifdef CONFIG_CC_IS_CLANG # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. KBUILD_CFLAGS += -Wno-gnu + +# Clang checks for overflow/truncation with '%p', while GCC does not: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 +KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) +KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) else # gcc inanely warns about local variables called 'main' @@ -105,11 +110,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) -else -# Clang checks for overflow/truncation with '%p', while GCC does not: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) -KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) endif KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) @@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += -Wno-enum-compare-conditional -KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif @@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ad55ef201aac..cad20f0e66ee 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -305,7 +305,7 @@ endef # These are shared by some Makefile.* files. ifdef CONFIG_LTO_CLANG -# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# Run $(LD) here to convert LLVM IR to ELF in the following cases: # - when this object needs objtool processing, as objtool cannot process LLVM IR # - when this is a single-object module, as modpost cannot process LLVM IR cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 0d00ac3723b5..4fd6b6ab3e32 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -165,6 +165,18 @@ fn has(&self, option: &str) -> bool { let option = "CONFIG_".to_owned() + option; self.0.contains_key(&option) } + + /// Is the rustc version at least `major.minor.patch`? + fn rustc_version_atleast(&self, major: u32, minor: u32, patch: u32) -> bool { + let check_version = 100000 * major + 100 * minor + patch; + let actual_version = self + .0 + .get("CONFIG_RUSTC_VERSION") + .unwrap() + .parse::<u32>() + .unwrap(); + check_version <= actual_version + } } fn main() { @@ -182,6 +194,9 @@ fn main() { } } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", @@ -215,6 +230,9 @@ fn main() { panic!("32-bit x86 only works under UML"); } ts.push("arch", "x86"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e18ae7dc8140..36b28987a2f0 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -507,6 +507,9 @@ static int parse_elf(struct elf_info *info, const char *filename) info->modinfo_len = sechdrs[i].sh_size; } else if (!strcmp(secname, ".export_symbol")) { info->export_symbol_secndx = i; + } else if (!strcmp(secname, ".no_trim_symbol")) { + info->no_trim_symbol = (void *)hdr + sechdrs[i].sh_offset; + info->no_trim_symbol_len = sechdrs[i].sh_size; } if (sechdrs[i].sh_type == SHT_SYMTAB) { @@ -1566,6 +1569,14 @@ static void read_symbols(const char *modname) /* strip trailing .o */ mod = new_module(modname, strlen(modname) - strlen(".o")); + /* save .no_trim_symbol section for later use */ + if (info.no_trim_symbol_len) { + mod->no_trim_symbol = xmalloc(info.no_trim_symbol_len); + memcpy(mod->no_trim_symbol, info.no_trim_symbol, + info.no_trim_symbol_len); + mod->no_trim_symbol_len = info.no_trim_symbol_len; + } + if (!mod->is_vmlinux) { license = get_modinfo(&info, "license"); if (!license) @@ -1728,6 +1739,28 @@ static void handle_white_list_exports(const char *white_list) free(buf); } +/* + * Keep symbols recorded in the .no_trim_symbol section. This is necessary to + * prevent CONFIG_TRIM_UNUSED_KSYMS from dropping EXPORT_SYMBOL because + * symbol_get() relies on the symbol being present in the ksymtab for lookups. + */ +static void keep_no_trim_symbols(struct module *mod) +{ + unsigned long size = mod->no_trim_symbol_len; + + for (char *s = mod->no_trim_symbol; s; s = next_string(s , &size)) { + struct symbol *sym; + + /* + * If find_symbol() returns NULL, this symbol is not provided + * by any module, and symbol_get() will fail. + */ + sym = find_symbol(s); + if (sym) + sym->used = true; + } +} + static void check_modname_len(struct module *mod) { const char *mod_name; @@ -2254,6 +2287,8 @@ int main(int argc, char **argv) read_symbols_from_files(files_source); list_for_each_entry(mod, &modules, list) { + keep_no_trim_symbols(mod); + if (mod->dump_file || mod->is_vmlinux) continue; diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index ffd0a52a606e..59366f456b76 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -111,6 +111,8 @@ struct module_alias { * * @dump_file: path to the .symvers file if loaded from a file * @aliases: list head for module_aliases + * @no_trim_symbol: .no_trim_symbol section data + * @no_trim_symbol_len: length of the .no_trim_symbol section */ struct module { struct list_head list; @@ -128,6 +130,8 @@ struct module { // Actual imported namespaces struct list_head imported_namespaces; struct list_head aliases; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; char name[]; }; @@ -141,6 +145,8 @@ struct elf_info { char *strtab; char *modinfo; unsigned int modinfo_len; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; /* support for 32bit section numbers */ diff --git a/scripts/module.lds.S b/scripts/module.lds.S index c2f80f9141d4..450f1088d5fd 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -16,6 +16,7 @@ SECTIONS { *(.discard) *(.discard.*) *(.export_symbol) + *(.no_trim_symbol) } __ksymtab 0 : ALIGN(8) { *(SORT(___ksymtab+*)) } diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index bb6e23c1174e..b724626ea0ca 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca8a7edff3dd..319aaa004c40 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): try: # this targets queue 4, which doesn't exist ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") except CmdExitFailure: pass else: @@ -259,7 +260,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): # change the table to target queues 0 and 2 ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 - ntuple2 = ethtool_create(cfg, "-N", flow) + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 8eb6aa606a0d..f048042e53e9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -26,13 +26,12 @@ static const char *const known_fs[] = { "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", - "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", - "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", - "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", - "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", - "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", - "zonefs", NULL }; + "ocfs2_dlmfs", "omfs", "openpromfs", "overlay", "pipefs", "proc", + "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", + "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", + "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", + "sysv", "tmpfs", "tracefs", "ubifs", "udf", "ufs", "v7", "vboxsf", + "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) { @@ -383,6 +382,10 @@ static void test_statmount_mnt_point(void) return; } + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n"); + return; + } if (strcmp(sm->str + sm->mnt_point, "/") != 0) { ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", sm->str + sm->mnt_point); @@ -408,6 +411,10 @@ static void test_statmount_mnt_root(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n"); + return; + } mnt_root = sm->str + sm->mnt_root; last_root = strrchr(mnt_root, '/'); if (last_root) @@ -437,6 +444,10 @@ static void test_statmount_fs_type(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_FS_TYPE)) { + ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n"); + return; + } fs_type = sm->str + sm->fs_type; for (s = known_fs; s != NULL; s++) { if (strcmp(fs_type, *s) == 0) @@ -464,6 +475,11 @@ static void test_statmount_mnt_opts(void) return; } + if (!(sm->mask & STATMOUNT_MNT_BASIC)) { + ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n"); + return; + } + while (getline(&line, &len, f_mountinfo) != -1) { int i; char *p, *p2; @@ -514,7 +530,10 @@ static void test_statmount_mnt_opts(void) if (p2) *p2 = '\0'; - statmount_opts = sm->str + sm->mnt_opts; + if (sm->mask & STATMOUNT_MNT_OPTS) + statmount_opts = sm->str + sm->mnt_opts; + else + statmount_opts = ""; if (strcmp(statmount_opts, p) != 0) ksft_test_result_fail( "unexpected mount options: '%s' != '%s'\n", diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index e32dd59703a0..85cc8c18d6e7 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) ); } -static void test_get_inital_dirty(void) +static void test_get_initial_dirty(void) { struct kvm_vm *vm = create_vm_two_memslots(); struct kvm_vcpu *vcpu; @@ -651,7 +651,7 @@ struct testdef { } testlist[] = { { "migration mode and dirty tracking", test_migration_mode }, { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty }, { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, }; diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 135ee22856cf..d265b34c54be 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -88,10 +88,6 @@ asm("test_skey_asm:\n" " ahi %r0,1\n" " st %r1,0(%r5,%r6)\n" - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - " sske %r1,%r6\n" " xgr %r1,%r1\n" " iske %r1,%r6\n" @@ -459,10 +455,14 @@ TEST_F(uc_kvm, uc_no_user_region) }; ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION", + strerror(errno), errno); ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2", + strerror(errno), errno); } TEST_F(uc_kvm, uc_map_unmap) @@ -596,7 +596,9 @@ TEST_F(uc_kvm, uc_skey) ASSERT_EQ(true, uc_handle_exit(self)); ASSERT_EQ(1, sync_regs->gprs[0]); - /* ISKE */ + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); /* @@ -608,21 +610,11 @@ TEST_F(uc_kvm, uc_skey) TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); - TEST_REQUIRE(sie_block->ipa != 0xb229); + TEST_REQUIRE(sie_block->ipa != 0xb22b); - /* ISKE contd. */ + /* SSKE + ISKE contd. */ ASSERT_EQ(false, uc_handle_exit(self)); ASSERT_EQ(2, sync_regs->gprs[0]); - /* assert initial skey (ACC = 0, R & C = 1) */ - ASSERT_EQ(0x06, sync_regs->gprs[1]); - uc_assert_diag44(self); - - /* SSKE + ISKE */ - sync_regs->gprs[1] = skeyvalue; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(3, sync_regs->gprs[0]); ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); uc_assert_diag44(self); @@ -631,7 +623,7 @@ TEST_F(uc_kvm, uc_skey) run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(4, sync_regs->gprs[0]); + ASSERT_EQ(3, sync_regs->gprs[0]); /* assert R reset but rest of skey unchanged */ ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb40233..15601402dee6 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -306,7 +306,8 @@ function check_result { result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ - sed 's/^\[[ 0-9.]*\] //') + sed 's/^\[[ 0-9.]*\] //' | \ + sed 's/^\[[ ]*[CT][0-9]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 414addef9a45..d240d02fa443 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1302,7 +1302,7 @@ int main_loop(void) return ret; if (cfg_truncate > 0) { - xdisconnect(fd); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { xdisconnect(fd); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec5..36ff28af4b19 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -102,6 +102,19 @@ struct testcase testcases_v4[] = { .gso_len = CONST_MSS_V4, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, @@ -205,6 +218,19 @@ struct testcase testcases_v6[] = { .gso_len = CONST_MSS_V6, .r_num_mss = 1, }, + { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 8c3a73461475..14ba51b52095 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -47,6 +47,7 @@ #include <linux/kcmp.h> #include <sys/resource.h> #include <sys/capability.h> +#include <linux/perf_event.h> #include <unistd.h> #include <sys/syscall.h> @@ -68,6 +69,10 @@ # define PR_SET_PTRACER 0x59616d61 #endif +#ifndef noinline +#define noinline __attribute__((noinline)) +#endif + #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -4888,6 +4893,200 @@ TEST(tsync_vs_dead_thread_leader) EXPECT_EQ(0, status); } +noinline int probed(void) +{ + return 1; +} + +static int parse_uint_from_file(const char *file, const char *fmt) +{ + int err = -1, ret; + FILE *f; + + f = fopen(file, "re"); + if (f) { + err = fscanf(f, fmt, &ret); + fclose(f); + } + return err == 1 ? ret : err; +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static ssize_t get_uprobe_offset(const void *addr) +{ + size_t start, base, end; + bool found = false; + char buf[256]; + FILE *f; + + f = fopen("/proc/self/maps", "r"); + if (!f) + return -1; + + while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { + if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { + found = true; + break; + } + } + fclose(f); + return found ? (uintptr_t)addr - start + base : -1; +} + +FIXTURE(URETPROBE) { + int fd; +}; + +FIXTURE_VARIANT(URETPROBE) { + /* + * All of the URETPROBE behaviors can be tested with either + * uretprobe attached or not + */ + bool attach; +}; + +FIXTURE_VARIANT_ADD(URETPROBE, attached) { + .attach = true, +}; + +FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { + .attach = false, +}; + +FIXTURE_SETUP(URETPROBE) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; + ssize_t offset; + int type, bit; + +#ifndef __NR_uretprobe + SKIP(return, "__NR_uretprobe syscall not defined"); +#endif + + if (!variant->attach) + return; + + memset(&attr, 0, attr_sz); + + type = determine_uprobe_perf_type(); + ASSERT_GE(type, 0); + bit = determine_uprobe_retprobe_bit(); + ASSERT_GE(bit, 0); + offset = get_uprobe_offset(probed); + ASSERT_GE(offset, 0); + + attr.config |= 1 << bit; + attr.size = attr_sz; + attr.type = type; + attr.config1 = ptr_to_u64("/proc/self/exe"); + attr.config2 = offset; + + self->fd = syscall(__NR_perf_event_open, &attr, + getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, + PERF_FLAG_FD_CLOEXEC); +} + +FIXTURE_TEARDOWN(URETPROBE) +{ + /* we could call close(self->fd), but we'd need extra filter for + * that and since we are calling _exit right away.. + */ +} + +static int run_probed_with_filter(struct sock_fprog *prog) +{ + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { + return -1; + } + + probed(); + return 0; +} + +TEST_F(URETPROBE, uretprobe_default_allow) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), +#endif + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + /* * TODO: * - expand NNP testing diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d3dd65b05b5f..9044ac054167 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -94,5 +94,37 @@ "$TC qdisc del dev $DUMMY ingress", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "a4b9", + "name": "Test class qlen notification", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2: handle 3: drr", + "$TC filter add dev $DUMMY parent 3: basic action drop", + "$TC class add dev $DUMMY parent 3: classid 3:1 drr", + "$TC class del dev $DUMMY classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json index ae3d286a32b2..6f20d033670d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -313,6 +313,29 @@ "matchPattern": "qdisc bfifo 1: root", "matchCount": "0", "teardown": [ + ] + }, + { + "id": "d774", + "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0", + "category": [ + "qdisc", + "pfifo_head_drop" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0", + "$IP link set dev $DUMMY up || true" + ], + "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "dropped 2", + "matchCount": "1", + "teardown": [ ] } ] diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index faf10671eed2..ba0327e2d0d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1070,15 +1070,6 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) return ret; } -/* - * Called after the VM is otherwise initialized, but just before adding it to - * the vm_list. - */ -int __weak kvm_arch_post_init_vm(struct kvm *kvm) -{ - return 0; -} - /* * Called just after removing the VM from the vm_list, but before doing any * other destruction. @@ -1199,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_debugfs; - r = kvm_arch_post_init_vm(kvm); - if (r) - goto out_err; - mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); mutex_unlock(&kvm_lock); @@ -1212,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) return kvm; -out_err: - kvm_destroy_vm_debugfs(kvm); out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: @@ -1971,7 +1956,15 @@ static int kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) return -EINVAL; - if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) + + /* + * The size of userspace-defined memory regions is restricted in order + * to play nice with dirty bitmap operations, which are indexed with an + * "unsigned int". KVM's internal memory regions don't support dirty + * logging, and so are exempt. + */ + if (id < KVM_USER_MEM_SLOTS && + (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; slots = __kvm_memslots(kvm, as_id);