Commit 9ca8e40c authored by Jack Steiner's avatar Jack Steiner Committed by Linus Torvalds

GRU Driver V3: fixes to resolve code review comments

Fixes problems identified in a code review:
	- add comment with high level dscription of the GRU
	- prepend "gru_" to all global names
	- delete unused function
	- couple of trivial bug fixes

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarJack Steiner <steiner@sgi.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3d919e5f
...@@ -284,16 +284,6 @@ __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype, ...@@ -284,16 +284,6 @@ __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
(exopc << GRU_CB_EXOPC_SHFT); (exopc << GRU_CB_EXOPC_SHFT);
} }
/*
* Prefetch a cacheline. Fetch is unconditional. Must page fault if
* no valid TLB entry is found.
* ??? should I use actual "load" or hardware prefetch???
*/
static inline void gru_prefetch(void *p)
{
*(volatile char *)p;
}
/* /*
* Architecture specific intrinsics * Architecture specific intrinsics
*/ */
......
...@@ -112,6 +112,10 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -112,6 +112,10 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE)) if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
return -EPERM; return -EPERM;
if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
return -EINVAL;
vma->vm_flags |= vma->vm_flags |=
(VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP | (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
VM_RESERVED); VM_RESERVED);
...@@ -471,8 +475,8 @@ struct vm_operations_struct gru_vm_ops = { ...@@ -471,8 +475,8 @@ struct vm_operations_struct gru_vm_ops = {
module_init(gru_init); module_init(gru_init);
module_exit(gru_exit); module_exit(gru_exit);
module_param(options, ulong, 0644); module_param(gru_options, ulong, 0644);
MODULE_PARM_DESC(options, "Various debug options"); MODULE_PARM_DESC(gru_options, "Various debug options");
MODULE_AUTHOR("Silicon Graphics, Inc."); MODULE_AUTHOR("Silicon Graphics, Inc.");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
......
...@@ -638,11 +638,11 @@ int gru_kservices_init(struct gru_state *gru) ...@@ -638,11 +638,11 @@ int gru_kservices_init(struct gru_state *gru)
cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id); cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
num = GRU_NUM_KERNEL_CBR * cpus_possible; num = GRU_NUM_KERNEL_CBR * cpus_possible;
cbr_map = reserve_gru_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL); cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
gru->gs_reserved_cbrs += num; gru->gs_reserved_cbrs += num;
num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible; num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
dsr_map = reserve_gru_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL); dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
gru->gs_reserved_dsr_bytes += num; gru->gs_reserved_dsr_bytes += num;
gru->gs_active_contexts++; gru->gs_active_contexts++;
...@@ -673,7 +673,7 @@ int gru_kservices_init(struct gru_state *gru) ...@@ -673,7 +673,7 @@ int gru_kservices_init(struct gru_state *gru)
} }
unlock_cch_handle(cch); unlock_cch_handle(cch);
if (options & GRU_QUICKLOOK) if (gru_options & GRU_QUICKLOOK)
quicktest(gru); quicktest(gru);
return 0; return 0;
} }
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "grutables.h" #include "grutables.h"
#include "gruhandles.h" #include "gruhandles.h"
unsigned long options __read_mostly; unsigned long gru_options __read_mostly;
static struct device_driver gru_driver = { static struct device_driver gru_driver = {
.name = "gru" .name = "gru"
...@@ -163,14 +163,14 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax, ...@@ -163,14 +163,14 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
return bits; return bits;
} }
unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count, unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
char *cbmap) char *cbmap)
{ {
return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU, return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
cbmap); cbmap);
} }
unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count, unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
char *dsmap) char *dsmap)
{ {
return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU, return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
...@@ -182,10 +182,10 @@ static void reserve_gru_resources(struct gru_state *gru, ...@@ -182,10 +182,10 @@ static void reserve_gru_resources(struct gru_state *gru,
{ {
gru->gs_active_contexts++; gru->gs_active_contexts++;
gts->ts_cbr_map = gts->ts_cbr_map =
reserve_gru_cb_resources(gru, gts->ts_cbr_au_count, gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
gts->ts_cbr_idx); gts->ts_cbr_idx);
gts->ts_dsr_map = gts->ts_dsr_map =
reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL); gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
} }
static void free_gru_resources(struct gru_state *gru, static void free_gru_resources(struct gru_state *gru,
...@@ -416,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts) ...@@ -416,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
/* /*
* Prefetching cachelines help hardware performance. * Prefetching cachelines help hardware performance.
* (Strictly a performance enhancement. Not functionally required).
*/ */
static void prefetch_data(void *p, int num, int stride) static void prefetch_data(void *p, int num, int stride)
{ {
...@@ -746,6 +747,8 @@ static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts) ...@@ -746,6 +747,8 @@ static struct gru_state *gru_assign_gru_context(struct gru_thread_state *gts)
* gru_nopage * gru_nopage
* *
* Map the user's GRU segment * Map the user's GRU segment
*
* Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
*/ */
int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
...@@ -757,6 +760,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -757,6 +760,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma, vaddr, GSEG_BASE(vaddr)); vma, vaddr, GSEG_BASE(vaddr));
STAT(nopfn); STAT(nopfn);
/* The following check ensures vaddr is a valid address in the VMA */
gts = gru_find_thread_state(vma, TSID(vaddr, vma)); gts = gru_find_thread_state(vma, TSID(vaddr, vma));
if (!gts) if (!gts)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
...@@ -775,7 +779,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -775,7 +779,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
} }
if (!gts->ts_gru) { if (!gts->ts_gru) {
while (!gru_assign_gru_context(gts)) { if (!gru_assign_gru_context(gts)) {
mutex_unlock(&gts->ts_ctxlock); mutex_unlock(&gts->ts_ctxlock);
preempt_enable(); preempt_enable();
schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */ schedule_timeout(GRU_ASSIGN_DELAY); /* true hack ZZZ */
......
...@@ -122,7 +122,7 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf, ...@@ -122,7 +122,7 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf,
static int options_show(struct seq_file *s, void *p) static int options_show(struct seq_file *s, void *p)
{ {
seq_printf(s, "0x%lx\n", options); seq_printf(s, "0x%lx\n", gru_options);
return 0; return 0;
} }
...@@ -136,7 +136,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf, ...@@ -136,7 +136,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
(buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
return -EFAULT; return -EFAULT;
if (!strict_strtoul(buf, 10, &val)) if (!strict_strtoul(buf, 10, &val))
options = val; gru_options = val;
return count; return count;
} }
......
...@@ -24,6 +24,70 @@ ...@@ -24,6 +24,70 @@
#define __GRUTABLES_H__ #define __GRUTABLES_H__
/* /*
* GRU Chiplet:
* The GRU is a user addressible memory accelerator. It provides
* several forms of load, store, memset, bcopy instructions. In addition, it
* contains special instructions for AMOs, sending messages to message
* queues, etc.
*
* The GRU is an integral part of the node controller. It connects
* directly to the cpu socket. In its current implementation, there are 2
* GRU chiplets in the node controller on each blade (~node).
*
* The entire GRU memory space is fully coherent and cacheable by the cpus.
*
* Each GRU chiplet has a physical memory map that looks like the following:
*
* +-----------------+
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* |/////////////////|
* +-----------------+
* | system control |
* +-----------------+ _______ +-------------+
* |/////////////////| / | |
* |/////////////////| / | |
* |/////////////////| / | instructions|
* |/////////////////| / | |
* |/////////////////| / | |
* |/////////////////| / |-------------|
* |/////////////////| / | |
* +-----------------+ | |
* | context 15 | | data |
* +-----------------+ | |
* | ...... | \ | |
* +-----------------+ \____________ +-------------+
* | context 1 |
* +-----------------+
* | context 0 |
* +-----------------+
*
* Each of the "contexts" is a chunk of memory that can be mmaped into user
* space. The context consists of 2 parts:
*
* - an instruction space that can be directly accessed by the user
* to issue GRU instructions and to check instruction status.
*
* - a data area that acts as normal RAM.
*
* User instructions contain virtual addresses of data to be accessed by the
* GRU. The GRU contains a TLB that is used to convert these user virtual
* addresses to physical addresses.
*
* The "system control" area of the GRU chiplet is used by the kernel driver
* to manage user contexts and to perform functions such as TLB dropin and
* purging.
*
* One context may be reserved for the kernel and used for cross-partition
* communication. The GRU will also be used to asynchronously zero out
* large blocks of memory (not currently implemented).
*
*
* Tables: * Tables:
* *
* VDATA-VMA Data - Holds a few parameters. Head of linked list of * VDATA-VMA Data - Holds a few parameters. Head of linked list of
...@@ -190,14 +254,14 @@ struct gru_stats_s { ...@@ -190,14 +254,14 @@ struct gru_stats_s {
#define GRU_STEAL_DELAY ((HZ * 200) / 1000) #define GRU_STEAL_DELAY ((HZ * 200) / 1000)
#define STAT(id) do { \ #define STAT(id) do { \
if (options & OPT_STATS) \ if (gru_options & OPT_STATS) \
atomic_long_inc(&gru_stats.id); \ atomic_long_inc(&gru_stats.id); \
} while (0) } while (0)
#ifdef CONFIG_SGI_GRU_DEBUG #ifdef CONFIG_SGI_GRU_DEBUG
#define gru_dbg(dev, fmt, x...) \ #define gru_dbg(dev, fmt, x...) \
do { \ do { \
if (options & OPT_DPRINT) \ if (gru_options & OPT_DPRINT) \
dev_dbg(dev, "%s: " fmt, __func__, x); \ dev_dbg(dev, "%s: " fmt, __func__, x); \
} while (0) } while (0)
#else #else
...@@ -529,9 +593,9 @@ extern void gru_flush_all_tlb(struct gru_state *gru); ...@@ -529,9 +593,9 @@ extern void gru_flush_all_tlb(struct gru_state *gru);
extern int gru_proc_init(void); extern int gru_proc_init(void);
extern void gru_proc_exit(void); extern void gru_proc_exit(void);
extern unsigned long reserve_gru_cb_resources(struct gru_state *gru, extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
int cbr_au_count, char *cbmap); int cbr_au_count, char *cbmap);
extern unsigned long reserve_gru_ds_resources(struct gru_state *gru, extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
int dsr_au_count, char *dsmap); int dsr_au_count, char *dsmap);
extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf); extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
extern struct gru_mm_struct *gru_register_mmu_notifier(void); extern struct gru_mm_struct *gru_register_mmu_notifier(void);
...@@ -540,6 +604,6 @@ extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms); ...@@ -540,6 +604,6 @@ extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
unsigned long len); unsigned long len);
extern unsigned long options; extern unsigned long gru_options;
#endif /* __GRUTABLES_H__ */ #endif /* __GRUTABLES_H__ */
...@@ -242,7 +242,9 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn, ...@@ -242,7 +242,9 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct, struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier); ms_notifier);
atomic_dec(&gms->ms_range_active); /* ..._and_test() provides needed barrier */
(void)atomic_dec_and_test(&gms->ms_range_active);
wake_up_all(&gms->ms_wait_queue); wake_up_all(&gms->ms_wait_queue);
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end); gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment