diff options
author | Sebastian Sanchez <sebastian.sanchez@intel.com> | 2016-10-25 22:12:34 +0200 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2016-11-15 22:37:27 +0100 |
commit | 8af8d2970ed98493a2db88dfcad88b0065e55e79 (patch) | |
tree | 57841ee0bdf580dc5c23fe6de9a52dfe20cf005d /drivers/infiniband/hw/hfi1/pio.h | |
parent | IB/hfi1: Get rid of divide in pio buffer allocator (diff) | |
download | linux-8af8d2970ed98493a2db88dfcad88b0065e55e79.tar.xz linux-8af8d2970ed98493a2db88dfcad88b0065e55e79.zip |
IB/hfi1: Optimize pio_buf and send_context structs
Both pio_buf and send_context structs have oversized
fields and have cachelines that can be optimized.
Reduce oversized fields for both structs.
Make sure pio_buf struct fits within a cacheline.
Move read-only fields to their own cacheline in
send_context struct.
All of this will avoid cacheline trading as the ring
progresses and pio buffers/send contexts are used.
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/hw/hfi1/pio.h')
-rw-r--r-- | drivers/infiniband/hw/hfi1/pio.h | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 498b548055e0..867e5ffc3595 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -83,43 +83,43 @@ struct pio_buf { void *arg; /* argument for cb */ void __iomem *start; /* buffer start address */ void __iomem *end; /* context end address */ - unsigned long size; /* context size, in bytes */ unsigned long sent_at; /* buffer is sent when <= free */ - u32 block_count; /* size of buffer, in blocks */ - u32 qw_written; /* QW written so far */ - u32 carry_bytes; /* number of valid bytes in carry */ union mix carry; /* pending unwritten bytes */ + u16 qw_written; /* QW written so far */ + u8 carry_bytes; /* number of valid bytes in carry */ }; /* cache line aligned pio buffer array */ union pio_shadow_ring { struct pio_buf pbuf; - u64 unused[16]; /* cache line spacer */ } ____cacheline_aligned; /* per-NUMA send context */ struct send_context { /* read-only after init */ struct hfi1_devdata *dd; /* device */ - void __iomem *base_addr; /* start of PIO memory */ union pio_shadow_ring *sr; /* shadow ring */ + void __iomem *base_addr; /* start of PIO memory */ + u32 __percpu *buffers_allocated;/* count of buffers allocated */ + u32 size; /* context size, in bytes */ - struct work_struct halt_work; /* halted context work queue entry */ - unsigned long flags; /* flags */ int node; /* context home node */ - int type; /* context type */ - u32 sw_index; /* software index number */ - u32 hw_context; /* hardware context number */ - u32 credits; /* number of blocks in context */ u32 sr_size; /* size of the shadow ring */ - u32 group; /* credit return group */ + u16 flags; /* flags */ + u8 type; /* context type */ + u8 sw_index; /* software index number */ + u8 hw_context; /* hardware context number */ + u8 group; /* credit return group */ + /* allocator fields */ spinlock_t alloc_lock ____cacheline_aligned_in_smp; u32 sr_head; /* shadow ring head */ unsigned long fill; /* official alloc count */ unsigned long alloc_free; /* copy of free (less cache thrash) */ - u32 __percpu *buffers_allocated;/* count of buffers allocated */ u32 fill_wrap; /* tracks fill within ring */ + u32 credits; /* number of blocks in context */ + /* adding a new field here would make it part of this cacheline */ + /* releaser fields */ spinlock_t release_lock ____cacheline_aligned_in_smp; u32 sr_tail; /* shadow ring tail */ @@ -131,6 +131,7 @@ struct send_context { u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */ + struct work_struct halt_work; /* halted context work queue entry */ }; /* send context flags */ |