Commit 47143b09 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile

* 'drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile:
  pci root complex: support for tile architecture
  drivers/net/tile/: on-chip network drivers for the tile architecture
  MAINTAINERS: add drivers/char/hvc_tile.c as maintained by tile
parents 0b9466cc f02cbbe6
...@@ -5839,6 +5839,8 @@ M: Chris Metcalf <cmetcalf@tilera.com> ...@@ -5839,6 +5839,8 @@ M: Chris Metcalf <cmetcalf@tilera.com>
W: http://www.tilera.com/scm/ W: http://www.tilera.com/scm/
S: Supported S: Supported
F: arch/tile/ F: arch/tile/
F: drivers/char/hvc_tile.c
F: drivers/net/tile/
TLAN NETWORK DRIVER TLAN NETWORK DRIVER
M: Samuel Chessman <chessman@tux.org> M: Samuel Chessman <chessman@tux.org>
......
...@@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration ...@@ -329,6 +329,18 @@ endmenu # Tilera-specific configuration
menu "Bus options" menu "Bus options"
config PCI
bool "PCI support"
default y
select PCI_DOMAINS
---help---
Enable PCI root complex support, so PCIe endpoint devices can
be attached to the Tile chip. Many, but not all, PCI devices
are supported under Tilera's root complex driver.
config PCI_DOMAINS
bool
config NO_IOMEM config NO_IOMEM
def_bool !PCI def_bool !PCI
......
...@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size) ...@@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size)
mb_incoherent(); mb_incoherent();
} }
/*
* Flush & invalidate a VA range that is homed remotely on a single core,
* waiting until the memory controller holds the flushed values.
*/
static inline void finv_buffer_remote(void *buffer, size_t size)
{
char *p;
int i;
/*
* Flush and invalidate the buffer out of the local L1/L2
* and request the home cache to flush and invalidate as well.
*/
__finv_buffer(buffer, size);
/*
* Wait for the home cache to acknowledge that it has processed
* all the flush-and-invalidate requests. This does not mean
* that the flushed data has reached the memory controller yet,
* but it does mean the home cache is processing the flushes.
*/
__insn_mf();
/*
* Issue a load to the last cache line, which can't complete
* until all the previously-issued flushes to the same memory
* controller have also completed. If we weren't striping
* memory, that one load would be sufficient, but since we may
* be, we also need to back up to the last load issued to
* another memory controller, which would be the point where
* we crossed an 8KB boundary (the granularity of striping
* across memory controllers). Keep backing up and doing this
* until we are before the beginning of the buffer, or have
* hit all the controllers.
*/
for (i = 0, p = (char *)buffer + size - 1;
i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
++i) {
const unsigned long STRIPE_WIDTH = 8192;
/* Force a load instruction to issue. */
*(volatile char *)p;
/* Jump to end of previous stripe. */
p -= STRIPE_WIDTH;
p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
}
/* Wait for the loads (and thus flushes) to have completed. */
__insn_mf();
}
#endif /* _ASM_TILE_CACHEFLUSH_H */ #endif /* _ASM_TILE_CACHEFLUSH_H */
...@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr); ...@@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr);
#define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size)
#define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size)
void __iomem *ioport_map(unsigned long port, unsigned int len);
extern inline void ioport_unmap(void __iomem *addr) {}
#define mmiowb() #define mmiowb()
/* Conversion between virtual and physical mappings. */ /* Conversion between virtual and physical mappings. */
...@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, ...@@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
* we never run, uses them unconditionally. * we never run, uses them unconditionally.
*/ */
static inline int ioport_panic(void) static inline long ioport_panic(void)
{ {
panic("inb/outb and friends do not exist on tile"); panic("inb/outb and friends do not exist on tile");
return 0; return 0;
} }
static inline void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) ioport_panic();
}
static inline void ioport_unmap(void __iomem *addr)
{
ioport_panic();
}
static inline u8 inb(unsigned long addr) static inline u8 inb(unsigned long addr)
{ {
return ioport_panic(); return ioport_panic();
......
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#ifndef _ASM_TILE_PCI_BRIDGE_H
#define _ASM_TILE_PCI_BRIDGE_H
#include <linux/ioport.h>
#include <linux/pci.h>
struct device_node;
struct pci_controller;
/*
* pci_io_base returns the memory address at which you can access
* the I/O space for PCI bus number `bus' (or NULL on error).
*/
extern void __iomem *pci_bus_io_base(unsigned int bus);
extern unsigned long pci_bus_io_base_phys(unsigned int bus);
extern unsigned long pci_bus_mem_base_phys(unsigned int bus);
/* Allocate a new PCI host bridge structure */
extern struct pci_controller *pcibios_alloc_controller(void);
/* Helper function for setting up resources */
extern void pci_init_resource(struct resource *res, unsigned long start,
unsigned long end, int flags, char *name);
/* Get the PCI host controller for a bus */
extern struct pci_controller *pci_bus_to_hose(int bus);
/*
* Structure of a PCI controller (host bridge)
*/
struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
int first_busno;
int last_busno;
int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
struct pci_ops *ops;
int irq_base; /* Base IRQ from the Hypervisor */
int plx_gen1; /* flag for PLX Gen 1 configuration */
/* Address ranges that are routed to this controller/bridge. */
struct resource mem_resources[3];
};
static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
{
return bus->sysdata;
}
extern void setup_indirect_pci_nomap(struct pci_controller *hose,
void __iomem *cfg_addr, void __iomem *cfg_data);
extern void setup_indirect_pci(struct pci_controller *hose,
u32 cfg_addr, u32 cfg_data);
extern void setup_grackle(struct pci_controller *hose);
extern unsigned char common_swizzle(struct pci_dev *, unsigned char *);
/*
* The following code swizzles for exactly one bridge. The routine
* common_swizzle below handles multiple bridges. But there are a
* some boards that don't follow the PCI spec's suggestion so we
* break this piece out separately.
*/
static inline unsigned char bridge_swizzle(unsigned char pin,
unsigned char idsel)
{
return (((pin-1) + idsel) % 4) + 1;
}
/*
* The following macro is used to lookup irqs in a standard table
* format for those PPC systems that do not already have PCI
* interrupts properly routed.
*/
/* FIXME - double check this */
#define PCI_IRQ_TABLE_LOOKUP ({ \
long _ctl_ = -1; \
if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \
_ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \
_ctl_; \
})
/*
* Scan the buses below a given PCI host bridge and assign suitable
* resources to all devices found.
*/
extern int pciauto_bus_scan(struct pci_controller *, int);
#ifdef CONFIG_PCI
extern unsigned long pci_address_to_pio(phys_addr_t address);
#else
static inline unsigned long pci_address_to_pio(phys_addr_t address)
{
return (unsigned long)-1;
}
#endif
#endif /* _ASM_TILE_PCI_BRIDGE_H */
...@@ -15,7 +15,29 @@ ...@@ -15,7 +15,29 @@
#ifndef _ASM_TILE_PCI_H #ifndef _ASM_TILE_PCI_H
#define _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H
#include <asm/pci-bridge.h> #include <linux/pci.h>
/*
* Structure of a PCI controller (host bridge)
*/
struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
int first_busno;
int last_busno;
int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
int hv_mem_fd; /* fd to Hypervisor for MMIO operations */
struct pci_ops *ops;
int irq_base; /* Base IRQ from the Hypervisor */
int plx_gen1; /* flag for PLX Gen 1 configuration */
/* Address ranges that are routed to this controller/bridge. */
struct resource mem_resources[3];
};
/* /*
* The hypervisor maps the entirety of CPA-space as bus addresses, so * The hypervisor maps the entirety of CPA-space as bus addresses, so
...@@ -24,56 +46,12 @@ ...@@ -24,56 +46,12 @@
*/ */
#define PCI_DMA_BUS_IS_PHYS 1 #define PCI_DMA_BUS_IS_PHYS 1
struct pci_controller *pci_bus_to_hose(int bus);
unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp);
int __init tile_pci_init(void); int __init tile_pci_init(void);
void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
void __devinit pcibios_fixup_bus(struct pci_bus *bus);
int __devinit _tile_cfg_read(struct pci_controller *hose, void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
int bus, static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
int slot,
int function,
int offset,
int size,
u32 *val);
int __devinit _tile_cfg_write(struct pci_controller *hose,
int bus,
int slot,
int function,
int offset,
int size,
u32 val);
/* void __devinit pcibios_fixup_bus(struct pci_bus *bus);
* These are used to to config reads and writes in the early stages of
* setup before the driver infrastructure has been set up enough to be
* able to do config reads and writes.
*/
#define early_cfg_read(where, size, value) \
_tile_cfg_read(controller, \
current_bus, \
pci_slot, \
pci_fn, \
where, \
size, \
value)
#define early_cfg_write(where, size, value) \
_tile_cfg_write(controller, \
current_bus, \
pci_slot, \
pci_fn, \
where, \
size, \
value)
#define PCICFG_BYTE 1
#define PCICFG_WORD 2
#define PCICFG_DWORD 4
#define TILE_NUM_PCIE 2 #define TILE_NUM_PCIE 2
...@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus) ...@@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus)
} }
/* /*
* I/O space is currently not supported. * pcibios_assign_all_busses() tells whether or not the bus numbers
* should be reassigned, in case the BIOS didn't do it correctly, or
* in case we don't have a BIOS and we want to let Linux do it.
*/ */
static inline int pcibios_assign_all_busses(void)
{
return 1;
}
#define TILE_PCIE_LOWER_IO 0x0 /*
#define TILE_PCIE_UPPER_IO 0x10000 * No special bus mastering setup handling.
#define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF */
#define _PAGE_NO_CACHE 0
#define _PAGE_GUARDED 0
#define pcibios_assign_all_busses() pci_assign_all_buses
extern int pci_assign_all_buses;
static inline void pcibios_set_master(struct pci_dev *dev) static inline void pcibios_set_master(struct pci_dev *dev)
{ {
/* No special bus mastering setup handling */
} }
#define PCIBIOS_MIN_MEM 0 #define PCIBIOS_MIN_MEM 0
#define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO #define PCIBIOS_MIN_IO 0
/* /*
* This flag tells if the platform is TILEmpower that needs * This flag tells if the platform is TILEmpower that needs
* special configuration for the PLX switch chip. * special configuration for the PLX switch chip.
*/ */
extern int blade_pci; extern int tile_plx_gen1;
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
/* implement the pci_ DMA API in terms of the generic device dma_ one */ /* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h> #include <asm-generic/pci-dma-compat.h>
...@@ -122,7 +100,4 @@ extern int blade_pci; ...@@ -122,7 +100,4 @@ extern int blade_pci;
/* generic pci stuff */ /* generic pci stuff */
#include <asm-generic/pci.h> #include <asm-generic/pci.h>
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
#endif /* _ASM_TILE_PCI_H */ #endif /* _ASM_TILE_PCI_H */
...@@ -292,8 +292,18 @@ extern int kstack_hash; ...@@ -292,8 +292,18 @@ extern int kstack_hash;
/* Are we using huge pages in the TLB for kernel data? */ /* Are we using huge pages in the TLB for kernel data? */
extern int kdata_huge; extern int kdata_huge;
/* Support standard Linux prefetching. */
#define ARCH_HAS_PREFETCH
#define prefetch(x) __builtin_prefetch(x)
#define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE()
/* Bring a value into the L1D, faulting the TLB if necessary. */
#ifdef __tilegx__
#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x))
#else
#define prefetch_L1(x) __insn_prefetch_L1((void *)(x))
#endif
#else /* __ASSEMBLY__ */ #else /* __ASSEMBLY__ */
/* Do some slow action (e.g. read a slow SPR). */ /* Do some slow action (e.g. read a slow SPR). */
......
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
/**
* @file drivers/xgbe/impl.h
* Implementation details for the NetIO library.
*/
#ifndef __DRV_XGBE_IMPL_H__
#define __DRV_XGBE_IMPL_H__
#include <hv/netio_errors.h>
#include <hv/netio_intf.h>
#include <hv/drv_xgbe_intf.h>
/** How many groups we have (log2). */
#define LOG2_NUM_GROUPS (12)
/** How many groups we have. */
#define NUM_GROUPS (1 << LOG2_NUM_GROUPS)
/** Number of output requests we'll buffer per tile. */
#define EPP_REQS_PER_TILE (32)
/** Words used in an eDMA command without checksum acceleration. */
#define EDMA_WDS_NO_CSUM 8
/** Words used in an eDMA command with checksum acceleration. */
#define EDMA_WDS_CSUM 10
/** Total available words in the eDMA command FIFO. */
#define EDMA_WDS_TOTAL 128
/*
* FIXME: These definitions are internal and should have underscores!
* NOTE: The actual numeric values here are intentional and allow us to
* optimize the concept "if small ... else if large ... else ...", by
* checking for the low bit being set, and then for non-zero.
* These are used as array indices, so they must have the values (0, 1, 2)
* in some order.
*/
#define SIZE_SMALL (1) /**< Small packet queue. */
#define SIZE_LARGE (2) /**< Large packet queue. */
#define SIZE_JUMBO (0) /**< Jumbo packet queue. */
/** The number of "SIZE_xxx" values. */
#define NETIO_NUM_SIZES 3
/*
* Default numbers of packets for IPP drivers. These values are chosen
* such that CIPP1 will not overflow its L2 cache.
*/
/** The default number of small packets. */
#define NETIO_DEFAULT_SMALL_PACKETS 2750
/** The default number of large packets. */
#define NETIO_DEFAULT_LARGE_PACKETS 2500
/** The default number of jumbo packets. */
#define NETIO_DEFAULT_JUMBO_PACKETS 250
/** Log2 of the size of a memory arena. */
#define NETIO_ARENA_SHIFT 24 /* 16 MB */
/** Size of a memory arena. */
#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT)
/** A queue of packets.
*
* This structure partially defines a queue of packets waiting to be
* processed. The queue as a whole is written to by an interrupt handler and
* read by non-interrupt code; this data structure is what's touched by the
* interrupt handler. The other part of the queue state, the read offset, is
* kept in user space, not in hypervisor space, so it is in a separate data
* structure.
*
* The read offset (__packet_receive_read in the user part of the queue
* structure) points to the next packet to be read. When the read offset is
* equal to the write offset, the queue is empty; therefore the queue must
* contain one more slot than the required maximum queue size.
*
* Here's an example of all 3 state variables and what they mean. All
* pointers move left to right.
*
* @code
* I I V V V V I I I I
* 0 1 2 3 4 5 6 7 8 9 10
* ^ ^ ^ ^
* | | |
* | | __last_packet_plus_one
* | __buffer_write
* __packet_receive_read
* @endcode
*
* This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one
* = 10). The read pointer is at 2, and the write pointer is at 6; thus,
* there are valid, unread packets in slots 2, 3, 4, and 5. The remaining
* slots are invalid (do not contain a packet).
*/
typedef struct {
/** Byte offset of the next notify packet to be written: zero for the first
* packet on the queue, sizeof (netio_pkt_t) for the second packet on the
* queue, etc. */
volatile uint32_t __packet_write;
/** Offset of the packet after the last valid packet (i.e., when any
* pointer is incremented to this value, it wraps back to zero). */
uint32_t __last_packet_plus_one;
}
__netio_packet_queue_t;
/** A queue of buffers.
*
* This structure partially defines a queue of empty buffers which have been
* obtained via requests to the IPP. (The elements of the queue are packet
* handles, which are transformed into a full netio_pkt_t when the buffer is
* retrieved.) The queue as a whole is written to by an interrupt handler and
* read by non-interrupt code; this data structure is what's touched by the
* interrupt handler. The other parts of the queue state, the read offset and
* requested write offset, are kept in user space, not in hypervisor space, so
* they are in a separate data structure.
*
* The read offset (__buffer_read in the user part of the queue structure)
* points to the next buffer to be read. When the read offset is equal to the
* write offset, the queue is empty; therefore the queue must contain one more
* slot than the required maximum queue size.
*
* The requested write offset (__buffer_requested_write in the user part of
* the queue structure) points to the slot which will hold the next buffer we
* request from the IPP, once we get around to sending such a request. When
* the requested write offset is equal to the write offset, no requests for
* new buffers are outstanding; when the requested write offset is one greater
* than the read offset, no more requests may be sent.
*
* Note that, unlike the packet_queue, the buffer_queue places incoming
* buffers at decreasing addresses. This makes the check for "is it time to
* wrap the buffer pointer" cheaper in the assembly code which receives new
* buffers, and means that the value which defines the queue size,
* __last_buffer, is different than in the packet queue. Also, the offset
* used in the packet_queue is already scaled by the size of a packet; here we
* use unscaled slot indices for the offsets. (These differences are
* historical, and in the future it's possible that the packet_queue will look
* more like this queue.)
*
* @code
* Here's an example of all 4 state variables and what they mean. Remember:
* all pointers move right to left.
*
* V V V I I R R V V V
* 0 1 2 3 4 5 6 7 8 9
* ^ ^ ^ ^
* | | | |
* | | | __last_buffer
* | | __buffer_write
* | __buffer_requested_write
* __buffer_read
* @endcode
*
* This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9).
* The read pointer is at 2, and the write pointer is at 6; thus, there are
* valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write
* pointer is at 4; thus, requests have been made to the IPP for buffers which
* will be placed in slots 6 and 5 when they arrive. Finally, the remaining
* slots are invalid (do not contain a buffer).
*/
typedef struct
{
/** Ordinal number of the next buffer to be written: 0 for the first slot in
* the queue, 1 for the second slot in the queue, etc. */
volatile uint32_t __buffer_write;
/** Ordinal number of the last buffer (i.e., when any pointer is decremented
* below zero, it is reloaded with this value). */
uint32_t __last_buffer;
}
__netio_buffer_queue_t;
/**
* An object for providing Ethernet packets to a process.
*/
typedef struct __netio_queue_impl_t
{
/** The queue of packets waiting to be received. */
__netio_packet_queue_t __packet_receive_queue;
/** The intr bit mask that IDs this device. */
unsigned int __intr_id;
/** Offset to queues of empty buffers, one per size. */
uint32_t __buffer_queue[NETIO_NUM_SIZES];
/** The address of the first EPP tile, or -1 if no EPP. */
/* ISSUE: Actually this is always "0" or "~0". */
uint32_t __epp_location;
/** The queue ID that this queue represents. */
unsigned int __queue_id;
/** Number of acknowledgements received. */
volatile uint32_t __acks_received;
/** Last completion number received for packet_sendv. */
volatile uint32_t __last_completion_rcv;
/** Number of packets allowed to be outstanding. */
uint32_t __max_outstanding;
/** First VA available for packets. */
void* __va_0;
/** First VA in second range available for packets. */
void* __va_1;
/** Padding to align the "__packets" field to the size of a netio_pkt_t. */
uint32_t __padding[3];
/** The packets themselves. */
netio_pkt_t __packets[0];
}
netio_queue_impl_t;
/**
* An object for managing the user end of a NetIO queue.
*/
typedef struct __netio_queue_user_impl_t
{
/** The next incoming packet to be read. */
uint32_t __packet_receive_read;
/** The next empty buffers to be read, one index per size. */
uint8_t __buffer_read[NETIO_NUM_SIZES];
/** Where the empty buffer we next request from the IPP will go, one index
* per size. */
uint8_t __buffer_requested_write[NETIO_NUM_SIZES];
/** PCIe interface flag. */
uint8_t __pcie;
/** Number of packets left to be received before we send a credit update. */
uint32_t __receive_credit_remaining;
/** Value placed in __receive_credit_remaining when it reaches zero. */
uint32_t __receive_credit_interval;
/** First fast I/O routine index. */
uint32_t __fastio_index;
/** Number of acknowledgements expected. */
uint32_t __acks_outstanding;
/** Last completion number requested. */
uint32_t __last_completion_req;
/** File descriptor for driver. */
int __fd;
}
netio_queue_user_impl_t;
#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */
#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */
/** Internal structure used to convey packet send information to the
* hypervisor. FIXME: Actually, it's not used for that anymore, but
* netio_packet_send() still uses it internally.
*/
typedef struct
{
uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */
uint16_t transfer_size; /**< Size of packet */
uint32_t va; /**< VA of start of packet */
__netio_pkt_handle_t handle; /**< Packet handle */
uint32_t csum0; /**< First checksum word */
uint32_t csum1; /**< Second checksum word */
}
__netio_send_cmd_t;
/** Flags used in two contexts:
* - As the "flags" member in the __netio_send_cmd_t, above; used only
* for netio_pkt_send_{prepare,commit}.
* - As part of the flags passed to the various send packet fast I/O calls.
*/
/** Need acknowledgement on this packet. Note that some code in the
* normal send_pkt fast I/O handler assumes that this is equal to 1. */
#define __NETIO_SEND_FLG_ACK 0x1
/** Do checksum on this packet. (Only used with the __netio_send_cmd_t;
* normal packet sends use a special fast I/O index to denote checksumming,
* and multi-segment sends test the checksum descriptor.) */
#define __NETIO_SEND_FLG_CSUM 0x2
/** Get a completion on this packet. Only used with multi-segment sends. */
#define __NETIO_SEND_FLG_COMPLETION 0x4
/** Position of the number-of-extra-segments value in the flags word.
Only used with multi-segment sends. */
#define __NETIO_SEND_FLG_XSEG_SHIFT 3
/** Width of the number-of-extra-segments value in the flags word. */
#define __NETIO_SEND_FLG_XSEG_WIDTH 2
#endif /* __DRV_XGBE_IMPL_H__ */
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
/**
* @file drv_xgbe_intf.h
* Interface to the hypervisor XGBE driver.
*/
#ifndef __DRV_XGBE_INTF_H__
#define __DRV_XGBE_INTF_H__
/**
* An object for forwarding VAs and PAs to the hypervisor.
* @ingroup types
*
* This allows the supervisor to specify a number of areas of memory to
* store packet buffers.
*/
typedef struct
{
/** The physical address of the memory. */
HV_PhysAddr pa;
/** Page table entry for the memory. This is only used to derive the
* memory's caching mode; the PA bits are ignored. */
HV_PTE pte;
/** The virtual address of the memory. */
HV_VirtAddr va;
/** Size (in bytes) of the memory area. */
int size;
}
netio_ipp_address_t;
/** The various pread/pwrite offsets into the hypervisor-level driver.
* @ingroup types
*/
typedef enum
{
/** Inform the Linux driver of the address of the NetIO arena memory.
* This offset is actually only used to convey information from netio
* to the Linux driver; it never makes it from there to the hypervisor.
* Write-only; takes a uint32_t specifying the VA address. */
NETIO_FIXED_ADDR = 0x5000000000000000ULL,
/** Inform the Linux driver of the size of the NetIO arena memory.
* This offset is actually only used to convey information from netio
* to the Linux driver; it never makes it from there to the hypervisor.
* Write-only; takes a uint32_t specifying the VA size. */
NETIO_FIXED_SIZE = 0x5100000000000000ULL,
/** Register current tile with IPP. Write then read: write, takes a
* netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */
NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL,
/** Unregister current tile from IPP. Write-only, takes a dummy argument. */
NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL,
/** Start packets flowing. Write-only, takes a dummy argument. */
NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL,
/** Stop packets flowing. Write-only, takes a dummy argument. */
NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL,
/** Configure group (typically we group on VLAN). Write-only: takes an
* array of netio_group_t's, low 24 bits of the offset is the base group
* number times the size of a netio_group_t. */
NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL,
/** Configure bucket. Write-only: takes an array of netio_bucket_t's, low
* 24 bits of the offset is the base bucket number times the size of a
* netio_bucket_t. */
NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL,
/** Get/set a parameter. Read or write: read or write data is the parameter
* value, low 32 bits of the offset is a __netio_getset_offset_t. */
NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL,
/** Get fast I/O index. Read-only; returns a 4-byte base index value. */
NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL,
/** Configure hijack IP address. Packets with this IPv4 dest address
* go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address
* in some standard form. FIXME: Define the form! */
NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL,
/**
* Offsets beyond this point are reserved for the supervisor (although that
* enforcement must be done by the supervisor driver itself).
*/
NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL,
/** Register I/O memory. Write-only, takes a netio_ipp_address_t. */
NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL,
/** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */
NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL,
/* Offsets greater than 0x7FFFFFFF can't be used directly from Linux
* userspace code due to limitations in the pread/pwrite syscalls. */
/** Drain LIPP buffers. */
NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL,
/** Supply a netio_ipp_address_t to be used as shared memory for the
* LEPP command queue. */
NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL,
/* 0xFC... is currently unused. */
/** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */
NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL,
/** Start IPP/EPP tiles. Write-only, takes a dummy argument. */
NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL,
/** Supply packet arena. Write-only, takes an array of
* netio_ipp_address_t values. */
NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL,
} netio_hv_offset_t;
/** Extract the base offset from an offset */
#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL)
/** Extract the local offset from an offset */
#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL)
/**
* Get/set offset.
*/
typedef union
{
struct
{
uint64_t addr:48; /**< Class-specific address */
unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */
unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */
}
bits; /**< Bitfields */
uint64_t word; /**< Aggregated value to use as the offset */
}
__netio_getset_offset_t;
/**
* Fast I/O index offsets (must be contiguous).
*/
typedef enum
{
NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */
NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */
NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */
NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */
NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */
NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */
NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */
NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */
} netio_fastio_index_t;
/** 3-word return type for Fast I/O call. */
typedef struct
{
int err; /**< Error code. */
uint32_t val0; /**< Value. Meaning depends upon the specific call. */
uint32_t val1; /**< Value. Meaning depends upon the specific call. */
} netio_fastio_rv3_t;
/** 0-argument fast I/O call */
int __netio_fastio0(uint32_t fastio_index);
/** 1-argument fast I/O call */
int __netio_fastio1(uint32_t fastio_index, uint32_t arg0);
/** 3-argument fast I/O call, 2-word return value */
netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0,
uint32_t arg1, uint32_t arg2);
/** 4-argument fast I/O call */
int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
uint32_t arg2, uint32_t arg3);
/** 6-argument fast I/O call */
int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5);
/** 9-argument fast I/O call */
int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1,
uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5,
uint32_t arg6, uint32_t arg7, uint32_t arg8);
/** Allocate an empty packet.
* @param fastio_index Fast I/O index.
* @param size Size of the packet to allocate.
*/
#define __netio_fastio_allocate(fastio_index, size) \
__netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size)
/** Free a buffer.
* @param fastio_index Fast I/O index.
* @param handle Handle for the packet to free.
*/
#define __netio_fastio_free_buffer(fastio_index, handle) \
__netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle)
/** Increment our receive credits.
* @param fastio_index Fast I/O index.
* @param credits Number of credits to add.
*/
#define __netio_fastio_return_credits(fastio_index, credits) \
__netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits)
/** Send packet, no checksum.
* @param fastio_index Fast I/O index.
* @param ackflag Nonzero if we want an ack.
* @param size Size of the packet.
* @param va Virtual address of start of packet.
* @param handle Packet handle.
*/
#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \
__netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \
size, va, handle)
/** Send packet, calculate checksum.
* @param fastio_index Fast I/O index.
* @param ackflag Nonzero if we want an ack.
* @param size Size of the packet.
* @param va Virtual address of start of packet.
* @param handle Packet handle.
* @param csum0 Shim checksum header.
* @param csum1 Checksum seed.
*/
#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \
csum0, csum1) \
__netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \
size, va, handle, csum0, csum1)
/** Format for the "csum0" argument to the __netio_fastio_send routines
* and LEPP. Note that this is currently exactly identical to the
* ShimProtocolOffloadHeader.
*/
typedef union
{
struct
{
unsigned int start_byte:7; /**< The first byte to be checksummed */
unsigned int count:14; /**< Number of bytes to be checksummed. */
unsigned int destination_byte:7; /**< The byte to write the checksum to. */
unsigned int reserved:4; /**< Reserved. */
} bits; /**< Decomposed method of access. */
unsigned int word; /**< To send out the IDN. */
} __netio_checksum_header_t;
/** Sendv packet with 1 or 2 segments.
* @param fastio_index Fast I/O index.
* @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
* 1 in next 2 bits; expected checksum in high 16 bits.
* @param confno Confirmation number to request, if notify flag set.
* @param csum0 Checksum descriptor; if zero, no checksum.
* @param va_F Virtual address of first segment.
* @param va_L Virtual address of last segment, if 2 segments.
* @param len_F_L Length of first segment in low 16 bits; length of last
* segment, if 2 segments, in high 16 bits.
*/
#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \
va_F, va_L, len_F_L) \
__netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
csum0, va_F, va_L, len_F_L)
/** Send packet on PCIe interface.
* @param fastio_index Fast I/O index.
* @param flags Ack/csum/notify flags in low 3 bits.
* @param confno Confirmation number to request, if notify flag set.
* @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe.
* @param va_F Virtual address of the packet buffer.
* @param va_L Virtual address of last segment, if 2 segments. Hard wired 0.
* @param len_F_L Length of the packet buffer in low 16 bits.
*/
#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \
va_F, va_L, len_F_L) \
__netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \
csum0, va_F, va_L, len_F_L)
/** Sendv packet with 3 or 4 segments.
* @param fastio_index Fast I/O index.
* @param flags Ack/csum/notify flags in low 3 bits; number of segments minus
* 1 in next 2 bits; expected checksum in high 16 bits.
* @param confno Confirmation number to request, if notify flag set.
* @param csum0 Checksum descriptor; if zero, no checksum.
* @param va_F Virtual address of first segment.
* @param va_L Virtual address of last segment (third segment if 3 segments,
* fourth segment if 4 segments).
* @param len_F_L Length of first segment in low 16 bits; length of last
* segment in high 16 bits.
* @param va_M0 Virtual address of "middle 0" segment; this segment is sent
* second when there are three segments, and third if there are four.
* @param va_M1 Virtual address of "middle 1" segment; this segment is sent
* second when there are four segments.
* @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle
* 1 segment, if 4 segments, in high 16 bits.
*/
#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \
va_L, len_F_L, va_M0, va_M1, len_M0_M1) \
__netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \
csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1)
/** Send vector of packets.
* @param fastio_index Fast I/O index.
* @param seqno Number of packets transmitted so far on this interface;
* used to decide which packets should be acknowledged.
* @param nentries Number of entries in vector.
* @param va Virtual address of start of vector entry array.
* @return 3-word netio_fastio_rv3_t structure. The structure's err member
* is an error code, or zero if no error. The val0 member is the
* updated value of seqno; it has been incremented by 1 for each
* packet sent. That increment may be less than nentries if an
* error occured, or if some of the entries in the vector contain
* handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the
* updated value of nentries; it has been decremented by 1 for each
* vector entry processed. Again, that decrement may be less than
* nentries (leaving the returned value positive) if an error
* occurred.
*/
#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \
__netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \
nentries, va)
/** An egress DMA command for LEPP. */
typedef struct
{
/** Is this a TSO transfer?
*
* NOTE: This field is always 0, to distinguish it from
* lepp_tso_cmd_t. It must come first!
*/
uint8_t tso : 1;
/** Unused padding bits. */
uint8_t _unused : 3;
/** Should this packet be sent directly from caches instead of DRAM,
* using hash-for-home to locate the packet data?
*/
uint8_t hash_for_home : 1;
/** Should we compute a checksum? */
uint8_t compute_checksum : 1;
/** Is this the final buffer for this packet?
*
* A single packet can be split over several input buffers (a "gather"
* operation). This flag indicates that this is the last buffer
* in a packet.
*/
uint8_t end_of_packet : 1;
/** Should LEPP advance 'comp_busy' when this DMA is fully finished? */
uint8_t send_completion : 1;
/** High bits of Client Physical Address of the start of the buffer
* to be egressed.
*
* NOTE: Only 6 bits are actually needed here, as CPAs are
* currently 38 bits. So two bits could be scavenged from this.
*/
uint8_t cpa_hi;
/** The number of bytes to be egressed. */
uint16_t length;
/** Low 32 bits of Client Physical Address of the start of the buffer
* to be egressed.
*/
uint32_t cpa_lo;
/** Checksum information (only used if 'compute_checksum'). */
__netio_checksum_header_t checksum_data;
} lepp_cmd_t;
/** A chunk of physical memory for a TSO egress. */
typedef struct
{
/** The low bits of the CPA. */
uint32_t cpa_lo;
/** The high bits of the CPA. */
uint16_t cpa_hi : 15;
/** Should this packet be sent directly from caches instead of DRAM,
* using hash-for-home to locate the packet data?
*/
uint16_t hash_for_home : 1;
/** The length in bytes. */
uint16_t length;
} lepp_frag_t;
/** An LEPP command that handles TSO. */
typedef struct
{
/** Is this a TSO transfer?
*
* NOTE: This field is always 1, to distinguish it from
* lepp_cmd_t. It must come first!
*/
uint8_t tso : 1;
/** Unused padding bits. */
uint8_t _unused : 7;
/** Size of the header[] array in bytes. It must be in the range
* [40, 127], which are the smallest header for a TCP packet over
* Ethernet and the maximum possible prepend size supported by
* hardware, respectively. Note that the array storage must be
* padded out to a multiple of four bytes so that the following
* LEPP command is aligned properly.
*/
uint8_t header_size;
/** Byte offset of the IP header in header[]. */
uint8_t ip_offset;
/** Byte offset of the TCP header in header[]. */
uint8_t tcp_offset;
/** The number of bytes to use for the payload of each packet,
* except of course the last one, which may not have enough bytes.
* This means that each Ethernet packet except the last will have a
* size of header_size + payload_size.
*/
uint16_t payload_size;
/** The length of the 'frags' array that follows this struct. */
uint16_t num_frags;
/** The actual frags. */
lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */];
/*
* The packet header template logically follows frags[],
* but you can't declare that in C.
*
* uint32_t header[header_size_in_words_rounded_up];
*/
} lepp_tso_cmd_t;
/** An LEPP completion ring entry. */
typedef void* lepp_comp_t;
/** Maximum number of frags for one TSO command. This is adapted from
* linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for
* our page size of exactly 65536. We add one for a "body" fragment.
*/
#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1)
/** Total number of bytes needed for an lepp_tso_cmd_t. */
#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \
(sizeof(lepp_tso_cmd_t) + \
(num_frags) * sizeof(lepp_frag_t) + \
(((header_size) + 3) & -4))
/** The size of the lepp "cmd" queue. */
#define LEPP_CMD_QUEUE_BYTES \
(((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \
(sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t))
/** The largest possible command that can go in lepp_queue_t::cmds[]. */
#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128)
/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive).
*/
#define LEPP_CMD_LIMIT \
(LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE)
/** The maximum number of completions in an LEPP queue. */
#define LEPP_COMP_QUEUE_SIZE \
((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t))
/** Increment an index modulo the queue size. */
#define LEPP_QINC(var) \
(var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1))
/** A queue used to convey egress commands from the client to LEPP. */
typedef struct
{
/** Index of first completion not yet processed by user code.
* If this is equal to comp_busy, there are no such completions.
*
* NOTE: This is only read/written by the user.
*/
unsigned int comp_head;
/** Index of first completion record not yet completed.
* If this is equal to comp_tail, there are no such completions.
* This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever
* a command with the 'completion' bit set is finished.
*
* NOTE: This is only written by LEPP, only read by the user.
*/
volatile unsigned int comp_busy;
/** Index of the first empty slot in the completion ring.
* Entries from this up to but not including comp_head (in ring order)
* can be filled in with completion data.
*
* NOTE: This is only read/written by the user.
*/
unsigned int comp_tail;
/** Byte index of first command enqueued for LEPP but not yet processed.
*
* This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
*
* NOTE: LEPP advances this counter as soon as it no longer needs
* the cmds[] storage for this entry, but the transfer is not actually
* complete (i.e. the buffer pointed to by the command is no longer
* needed) until comp_busy advances.
*
* If this is equal to cmd_tail, the ring is empty.
*
* NOTE: This is only written by LEPP, only read by the user.
*/
volatile unsigned int cmd_head;
/** Byte index of first empty slot in the command ring. This field can
* be incremented up to but not equal to cmd_head (because that would
* mean the ring is empty).
*
* This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT.
*
* NOTE: This is read/written by the user, only read by LEPP.
*/
volatile unsigned int cmd_tail;
/** A ring of variable-sized egress DMA commands.
*
* NOTE: Only written by the user, only read by LEPP.
*/
char cmds[LEPP_CMD_QUEUE_BYTES]
__attribute__((aligned(CHIP_L2_LINE_SIZE())));
/** A ring of user completion data.
* NOTE: Only read/written by the user.
*/
lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE]
__attribute__((aligned(CHIP_L2_LINE_SIZE())));
} lepp_queue_t;
/** An internal helper function for determining the number of entries
* available in a ring buffer, given that there is one sentinel.
*/
static inline unsigned int
_lepp_num_free_slots(unsigned int head, unsigned int tail)
{
/*
* One entry is reserved for use as a sentinel, to distinguish
* "empty" from "full". So we compute
* (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation.
*/
return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0);
}
/** Returns how many new comp entries can be enqueued. */
static inline unsigned int
lepp_num_free_comp_slots(const lepp_queue_t* q)
{
return _lepp_num_free_slots(q->comp_head, q->comp_tail);
}
static inline int
lepp_qsub(int v1, int v2)
{
int delta = v1 - v2;
return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE);
}
/** FIXME: Check this from linux, via a new "pwrite()" call. */
#define LIPP_VERSION 1
/** We use exactly two bytes of alignment padding. */
#define LIPP_PACKET_PADDING 2
/** The minimum size of a "small" buffer (including the padding). */
#define LIPP_SMALL_PACKET_SIZE 128
/*
* NOTE: The following two values should total to less than around
* 13582, to keep the total size used for "lipp_state_t" below 64K.
*/
/** The maximum number of "small" buffers.
* This is enough for 53 network cpus with 128 credits. Note that
* if these are exhausted, we will fall back to using large buffers.
*/
#define LIPP_SMALL_BUFFERS 6785
/** The maximum number of "large" buffers.
* This is enough for 53 network cpus with 128 credits.
*/
#define LIPP_LARGE_BUFFERS 6785
#endif /* __DRV_XGBE_INTF_H__ */
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
/**
* Error codes returned from NetIO routines.
*/
#ifndef __NETIO_ERRORS_H__
#define __NETIO_ERRORS_H__
/**
* @addtogroup error
*
* @brief The error codes returned by NetIO functions.
*
* NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and
* a negative value if an error occurs.
*
* In cases where a NetIO function failed due to a error reported by
* system libraries, the error code will be the negation of the
* system errno at the time of failure. The @ref netio_strerror()
* function will deliver error strings for both NetIO and system error
* codes.
*
* @{
*/
/** The set of all NetIO errors. */
typedef enum
{
/** Operation successfully completed. */
NETIO_NO_ERROR = 0,
/** A packet was successfully retrieved from an input queue. */
NETIO_PKT = 0,
/** Largest NetIO error number. */
NETIO_ERR_MAX = -701,
/** The tile is not registered with the IPP. */
NETIO_NOT_REGISTERED = -701,
/** No packet was available to retrieve from the input queue. */
NETIO_NOPKT = -702,
/** The requested function is not implemented. */
NETIO_NOT_IMPLEMENTED = -703,
/** On a registration operation, the target queue already has the maximum
* number of tiles registered for it, and no more may be added. On a
* packet send operation, the output queue is full and nothing more can
* be queued until some of the queued packets are actually transmitted. */
NETIO_QUEUE_FULL = -704,
/** The calling process or thread is not bound to exactly one CPU. */
NETIO_BAD_AFFINITY = -705,
/** Cannot allocate memory on requested controllers. */
NETIO_CANNOT_HOME = -706,
/** On a registration operation, the IPP specified is not configured
* to support the options requested; for instance, the application
* wants a specific type of tagged headers which the configured IPP
* doesn't support. Or, the supplied configuration information is
* not self-consistent, or is out of range; for instance, specifying
* both NETIO_RECV and NETIO_NO_RECV, or asking for more than
* NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket
* configure operation, the number of items, or the base item, was
* out of range.
*/
NETIO_BAD_CONFIG = -707,
/** Too many tiles have registered to transmit packets. */
NETIO_TOOMANY_XMIT = -708,
/** Packet transmission was attempted on a queue which was registered
with transmit disabled. */
NETIO_UNREG_XMIT = -709,
/** This tile is already registered with the IPP. */
NETIO_ALREADY_REGISTERED = -710,
/** The Ethernet link is down. The application should try again later. */
NETIO_LINK_DOWN = -711,
/** An invalid memory buffer has been specified. This may be an unmapped
* virtual address, or one which does not meet alignment requirements.
* For netio_input_register(), this error may be returned when multiple
* processes specify different memory regions to be used for NetIO
* buffers. That can happen if these processes specify explicit memory
* regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init()
* has not been called by a common ancestor of the processes.
*/
NETIO_FAULT = -712,
/** Cannot combine user-managed shared memory and cache coherence. */
NETIO_BAD_CACHE_CONFIG = -713,
/** Smallest NetIO error number. */
NETIO_ERR_MIN = -713,
#ifndef __DOXYGEN__
/** Used internally to mean that no response is needed; never returned to
* an application. */
NETIO_NO_RESPONSE = 1
#endif
} netio_error_t;
/** @} */
#endif /* __NETIO_ERRORS_H__ */
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o ...@@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_PCI) += pci.o
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/sections.h>
#include <asm/byteorder.h>
#include <asm/hv_driver.h>
#include <hv/drv_pcie_rc_intf.h>
/*
* Initialization flow and process
* -------------------------------
*
* This files containes the routines to search for PCI buses,
* enumerate the buses, and configure any attached devices.
*
* There are two entry points here:
* 1) tile_pci_init
* This sets up the pci_controller structs, and opens the
* FDs to the hypervisor. This is called from setup_arch() early
* in the boot process.
* 2) pcibios_init
* This probes the PCI bus(es) for any attached hardware. It's
* called by subsys_initcall. All of the real work is done by the
* generic Linux PCI layer.
*
*/
/*
* This flag tells if the platform is TILEmpower that needs
* special configuration for the PLX switch chip.
*/
int __write_once tile_plx_gen1;
static struct pci_controller controllers[TILE_NUM_PCIE];
static int num_controllers;
static struct pci_ops tile_cfg_ops;
/*
* We don't need to worry about the alignment of resources.
*/
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
resource_size_t size, resource_size_t align)
{
return res->start;
}
EXPORT_SYMBOL(pcibios_align_resource);
/*
* Open a FD to the hypervisor PCI device.
*
* controller_id is the controller number, config type is 0 or 1 for
* config0 or config1 operations.
*/
static int __init tile_pcie_open(int controller_id, int config_type)
{
char filename[32];
int fd;
sprintf(filename, "pcie/%d/config%d", controller_id, config_type);
fd = hv_dev_open((HV_VirtAddr)filename, 0);
return fd;
}
/*
* Get the IRQ numbers from the HV and set up the handlers for them.
*/
static int __init tile_init_irqs(int controller_id,
struct pci_controller *controller)
{
char filename[32];
int fd;
int ret;
int x;
struct pcie_rc_config rc_config;
sprintf(filename, "pcie/%d/ctl", controller_id);
fd = hv_dev_open((HV_VirtAddr)filename, 0);
if (fd < 0) {
pr_err("PCI: hv_dev_open(%s) failed\n", filename);
return -1;
}
ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config),
sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF);
hv_dev_close(fd);
if (ret != sizeof(rc_config)) {
pr_err("PCI: wanted %zd bytes, got %d\n",
sizeof(rc_config), ret);
return -1;
}
/* Record irq_base so that we can map INTx to IRQ # later. */
controller->irq_base = rc_config.intr;
for (x = 0; x < 4; x++)
tile_irq_activate(rc_config.intr + x,
TILE_IRQ_HW_CLEAR);
if (rc_config.plx_gen1)
controller->plx_gen1 = 1;
return 0;
}
/*
* First initialization entry point, called from setup_arch().
*
* Find valid controllers and fill in pci_controller structs for each
* of them.
*
* Returns the number of controllers discovered.
*/
int __init tile_pci_init(void)
{
int i;
pr_info("PCI: Searching for controllers...\n");
/* Do any configuration we need before using the PCIe */
for (i = 0; i < TILE_NUM_PCIE; i++) {
int hv_cfg_fd0 = -1;
int hv_cfg_fd1 = -1;
int hv_mem_fd = -1;
char name[32];
struct pci_controller *controller;
/*
* Open the fd to the HV. If it fails then this
* device doesn't exist.
*/
hv_cfg_fd0 = tile_pcie_open(i, 0);
if (hv_cfg_fd0 < 0)
continue;
hv_cfg_fd1 = tile_pcie_open(i, 1);
if (hv_cfg_fd1 < 0) {
pr_err("PCI: Couldn't open config fd to HV "
"for controller %d\n", i);
goto err_cont;
}
sprintf(name, "pcie/%d/mem", i);
hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0);
if (hv_mem_fd < 0) {
pr_err("PCI: Could not open mem fd to HV!\n");
goto err_cont;
}
pr_info("PCI: Found PCI controller #%d\n", i);
controller = &controllers[num_controllers];
if (tile_init_irqs(i, controller)) {
pr_err("PCI: Could not initialize "
"IRQs, aborting.\n");
goto err_cont;
}
controller->index = num_controllers;
controller->hv_cfg_fd[0] = hv_cfg_fd0;
controller->hv_cfg_fd[1] = hv_cfg_fd1;
controller->hv_mem_fd = hv_mem_fd;
controller->first_busno = 0;
controller->last_busno = 0xff;
controller->ops = &tile_cfg_ops;
num_controllers++;
continue;
err_cont:
if (hv_cfg_fd0 >= 0)
hv_dev_close(hv_cfg_fd0);
if (hv_cfg_fd1 >= 0)
hv_dev_close(hv_cfg_fd1);
if (hv_mem_fd >= 0)
hv_dev_close(hv_mem_fd);
continue;
}
/*
* Before using the PCIe, see if we need to do any platform-specific
* configuration, such as the PLX switch Gen 1 issue on TILEmpower.
*/
for (i = 0; i < num_controllers; i++) {
struct pci_controller *controller = &controllers[i];
if (controller->plx_gen1)
tile_plx_gen1 = 1;
}
return num_controllers;
}
/*
* (pin - 1) converts from the PCI standard's [1:4] convention to
* a normal [0:3] range.
*/
static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
{
struct pci_controller *controller =
(struct pci_controller *)dev->sysdata;
return (pin - 1) + controller->irq_base;
}
static void __init fixup_read_and_payload_sizes(void)
{
struct pci_dev *dev = NULL;
int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */
int max_read_size = 0x2; /* Limit to 512 byte reads. */
u16 new_values;
/* Scan for the smallest maximum payload size. */
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
int pcie_caps_offset;
u32 devcap;
int max_payload;
pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (pcie_caps_offset == 0)
continue;
pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP,
&devcap);
max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD;
if (max_payload < smallest_max_payload)
smallest_max_payload = max_payload;
}
/* Now, set the max_payload_size for all devices to that value. */
new_values = (max_read_size << 12) | (smallest_max_payload << 5);
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
int pcie_caps_offset;
u16 devctl;
pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP);
if (pcie_caps_offset == 0)
continue;
pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
&devctl);
devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ);
devctl |= new_values;
pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL,
devctl);
}
}
/*
* Second PCI initialization entry point, called by subsys_initcall.
*
* The controllers have been set up by the time we get here, by a call to
* tile_pci_init.
*/
static int __init pcibios_init(void)
{
int i;
pr_info("PCI: Probing PCI hardware\n");
/*
* Delay a bit in case devices aren't ready. Some devices are
* known to require at least 20ms here, but we use a more
* conservative value.
*/
mdelay(250);
/* Scan all of the recorded PCI controllers. */
for (i = 0; i < num_controllers; i++) {
struct pci_controller *controller = &controllers[i];
struct pci_bus *bus;
pr_info("PCI: initializing controller #%d\n", i);
/*
* This comes from the generic Linux PCI driver.
*
* It reads the PCI tree for this bus into the Linux
* data structures.
*
* This is inlined in linux/pci.h and calls into
* pci_scan_bus_parented() in probe.c.
*/
bus = pci_scan_bus(0, controller->ops, controller);
controller->root_bus = bus;
controller->last_busno = bus->subordinate;
}
/* Do machine dependent PCI interrupt routing */
pci_fixup_irqs(pci_common_swizzle, tile_map_irq);
/*
* This comes from the generic Linux PCI driver.
*
* It allocates all of the resources (I/O memory, etc)
* associated with the devices read in above.
*/
pci_assign_unassigned_resources();
/* Configure the max_read_size and max_payload_size values. */
fixup_read_and_payload_sizes();
/* Record the I/O resources in the PCI controller structure. */
for (i = 0; i < num_controllers; i++) {
struct pci_bus *root_bus = controllers[i].root_bus;
struct pci_bus *next_bus;
struct pci_dev *dev;
list_for_each_entry(dev, &root_bus->devices, bus_list) {
/* Find the PCI host controller, ie. the 1st bridge. */
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
(PCI_SLOT(dev->devfn) == 0)) {
next_bus = dev->subordinate;
controllers[i].mem_resources[0] =
*next_bus->resource[0];
controllers[i].mem_resources[1] =
*next_bus->resource[1];
controllers[i].mem_resources[2] =
*next_bus->resource[2];
break;
}
}
}
return 0;
}
subsys_initcall(pcibios_init);
/*
* No bus fixups needed.
*/
void __devinit pcibios_fixup_bus(struct pci_bus *bus)
{
/* Nothing needs to be done. */
}
/*
* This can be called from the generic PCI layer, but doesn't need to
* do anything.
*/
char __devinit *pcibios_setup(char *str)
{
/* Nothing needs to be done. */
return str;
}
/*
* This is called from the generic Linux layer.
*/
void __init pcibios_update_irq(struct pci_dev *dev, int irq)
{
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
}
/*
* Enable memory and/or address decoding, as appropriate, for the
* device described by the 'dev' struct.
*
* This is called from the generic PCI layer, and can be called
* for bridges or endpoints.
*/
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
u16 cmd, old_cmd;
u8 header_type;
int i;
struct resource *r;
pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
pci_read_config_word(dev, PCI_COMMAND, &cmd);
old_cmd = cmd;
if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
/*
* For bridges, we enable both memory and I/O decoding
* in call cases.
*/
cmd |= PCI_COMMAND_IO;
cmd |= PCI_COMMAND_MEMORY;
} else {
/*
* For endpoints, we enable memory and/or I/O decoding
* only if they have a memory resource of that type.
*/
for (i = 0; i < 6; i++) {
r = &dev->resource[i];
if (r->flags & IORESOURCE_UNSET) {
pr_err("PCI: Device %s not available "
"because of resource collisions\n",
pci_name(dev));
return -EINVAL;
}
if (r->flags & IORESOURCE_IO)
cmd |= PCI_COMMAND_IO;
if (r->flags & IORESOURCE_MEM)
cmd |= PCI_COMMAND_MEMORY;
}
}
/*
* We only write the command if it changed.
*/
if (cmd != old_cmd)
pci_write_config_word(dev, PCI_COMMAND, cmd);
return 0;
}
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
{
unsigned long start = pci_resource_start(dev, bar);
unsigned long len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len)
return NULL;
if (max && len > max)
len = max;
if (!(flags & IORESOURCE_MEM)) {
pr_info("PCI: Trying to map invalid resource %#lx\n", flags);
start = 0;
}
return (void __iomem *)start;
}
EXPORT_SYMBOL(pci_iomap);
/****************************************************************
*
* Tile PCI config space read/write routines
*
****************************************************************/
/*
* These are the normal read and write ops
* These are expanded with macros from pci_bus_read_config_byte() etc.
*
* devfn is the combined PCI slot & function.
*
* offset is in bytes, from the start of config space for the
* specified bus & slot.
*/
static int __devinit tile_cfg_read(struct pci_bus *bus,
unsigned int devfn,
int offset,
int size,
u32 *val)
{
struct pci_controller *controller = bus->sysdata;
int busnum = bus->number & 0xff;
int slot = (devfn >> 3) & 0x1f;
int function = devfn & 0x7;
u32 addr;
int config_mode = 1;
/*
* There is no bridge between the Tile and bus 0, so we
* use config0 to talk to bus 0.
*
* If we're talking to a bus other than zero then we
* must have found a bridge.
*/
if (busnum == 0) {
/*
* We fake an empty slot for (busnum == 0) && (slot > 0),
* since there is only one slot on bus 0.
*/
if (slot) {
*val = 0xFFFFFFFF;
return 0;
}
config_mode = 0;
}
addr = busnum << 20; /* Bus in 27:20 */
addr |= slot << 15; /* Slot (device) in 19:15 */
addr |= function << 12; /* Function is in 14:12 */
addr |= (offset & 0xFFF); /* byte address in 0:11 */
return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0,
(HV_VirtAddr)(val), size, addr);
}
/*
* See tile_cfg_read() for relevent comments.
* Note that "val" is the value to write, not a pointer to that value.
*/
static int __devinit tile_cfg_write(struct pci_bus *bus,
unsigned int devfn,
int offset,
int size,
u32 val)
{
struct pci_controller *controller = bus->sysdata;
int busnum = bus->number & 0xff;
int slot = (devfn >> 3) & 0x1f;
int function = devfn & 0x7;
u32 addr;
int config_mode = 1;
HV_VirtAddr valp = (HV_VirtAddr)&val;
/*
* For bus 0 slot 0 we use config 0 accesses.
*/
if (busnum == 0) {
/*
* We fake an empty slot for (busnum == 0) && (slot > 0),
* since there is only one slot on bus 0.
*/
if (slot)
return 0;
config_mode = 0;
}
addr = busnum << 20; /* Bus in 27:20 */
addr |= slot << 15; /* Slot (device) in 19:15 */
addr |= function << 12; /* Function is in 14:12 */
addr |= (offset & 0xFFF); /* byte address in 0:11 */
#ifdef __BIG_ENDIAN
/* Point to the correct part of the 32-bit "val". */
valp += 4 - size;
#endif
return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0,
valp, size, addr);
}
static struct pci_ops tile_cfg_ops = {
.read = tile_cfg_read,
.write = tile_cfg_write,
};
/*
* In the following, each PCI controller's mem_resources[1]
* represents its (non-prefetchable) PCI memory resource.
* mem_resources[0] and mem_resources[2] refer to its PCI I/O and
* prefetchable PCI memory resources, respectively.
* For more details, see pci_setup_bridge() in setup-bus.c.
* By comparing the target PCI memory address against the
* end address of controller 0, we can determine the controller
* that should accept the PCI memory access.
*/
#define TILE_READ(size, type) \
type _tile_read##size(unsigned long addr) \
{ \
type val; \
int idx = 0; \
if (addr > controllers[0].mem_resources[1].end && \
addr > controllers[0].mem_resources[2].end) \
idx = 1; \
if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \
(HV_VirtAddr)(&val), sizeof(type), addr)) \
pr_err("PCI: read %zd bytes at 0x%lX failed\n", \
sizeof(type), addr); \
return val; \
} \
EXPORT_SYMBOL(_tile_read##size)
TILE_READ(b, u8);
TILE_READ(w, u16);
TILE_READ(l, u32);
TILE_READ(q, u64);
#define TILE_WRITE(size, type) \
void _tile_write##size(type val, unsigned long addr) \
{ \
int idx = 0; \
if (addr > controllers[0].mem_resources[1].end && \
addr > controllers[0].mem_resources[2].end) \
idx = 1; \
if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \
(HV_VirtAddr)(&val), sizeof(type), addr)) \
pr_err("PCI: write %zd bytes at 0x%lX failed\n", \
sizeof(type), addr); \
} \
EXPORT_SYMBOL(_tile_write##size)
TILE_WRITE(b, u8);
TILE_WRITE(w, u16);
TILE_WRITE(l, u32);
TILE_WRITE(q, u64);
...@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig" ...@@ -2945,6 +2945,18 @@ source "drivers/s390/net/Kconfig"
source "drivers/net/caif/Kconfig" source "drivers/net/caif/Kconfig"
config TILE_NET
tristate "Tilera GBE/XGBE network driver support"
depends on TILE
default y
select CRC32
help
This is a standard Linux network device driver for the
on-chip Tilera Gigabit Ethernet and XAUI interfaces.
To compile this driver as a module, choose M here: the module
will be called tile_net.
config XEN_NETDEV_FRONTEND config XEN_NETDEV_FRONTEND
tristate "Xen network device frontend driver" tristate "Xen network device frontend driver"
depends on XEN depends on XEN
......
...@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/ ...@@ -301,3 +301,4 @@ obj-$(CONFIG_CAIF) += caif/
obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
obj-$(CONFIG_PCH_GBE) += pch_gbe/ obj-$(CONFIG_PCH_GBE) += pch_gbe/
obj-$(CONFIG_TILE_NET) += tile/
#
# Makefile for the TILE on-chip networking support.
#
obj-$(CONFIG_TILE_NET) += tile_net.o
ifdef CONFIG_TILEGX
tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o
else
tile_net-objs := tilepro.o
endif
/*
* Copyright 2010 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/kernel.h> /* printk() */
#include <linux/slab.h> /* kmalloc() */
#include <linux/errno.h> /* error codes */
#include <linux/types.h> /* size_t */
#include <linux/interrupt.h>
#include <linux/in.h>
#include <linux/netdevice.h> /* struct device, and other headers */
#include <linux/etherdevice.h> /* eth_type_trans */
#include <linux/skbuff.h>
#include <linux/ioctl.h>
#include <linux/cdev.h>
#include <linux/hugetlb.h>
#include <linux/in6.h>
#include <linux/timer.h>
#include <linux/io.h>
#include <asm/checksum.h>
#include <asm/homecache.h>
#include <hv/drv_xgbe_intf.h>
#include <hv/drv_xgbe_impl.h>
#include <hv/hypervisor.h>
#include <hv/netio_intf.h>
/* For TSO */
#include <linux/ip.h>
#include <linux/tcp.h>
/* There is no singlethread_cpu, so schedule work on the current cpu. */
#define singlethread_cpu -1
/*
* First, "tile_net_init_module()" initializes all four "devices" which
* can be used by linux.
*
* Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes
* the network cpus, then uses "tile_net_open_aux()" to initialize
* LIPP/LEPP, and then uses "tile_net_open_inner()" to register all
* the tiles, provide buffers to LIPP, allow ingress to start, and
* turn on hypervisor interrupt handling (and NAPI) on all tiles.
*
* If registration fails due to the link being down, then "retry_work"
* is used to keep calling "tile_net_open_inner()" until it succeeds.
*
* If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to
* stop egress, drain the LIPP buffers, unregister all the tiles, stop
* LIPP/LEPP, and wipe the LEPP queue.
*
* We start out with the ingress interrupt enabled on each CPU. When
* this interrupt fires, we disable it, and call "napi_schedule()".
* This will cause "tile_net_poll()" to be called, which will pull
* packets from the netio queue, filtering them out, or passing them
* to "netif_receive_skb()". If our budget is exhausted, we will
* return, knowing we will be called again later. Otherwise, we
* reenable the ingress interrupt, and call "napi_complete()".
*
*
* NOTE: The use of "native_driver" ensures that EPP exists, and that
* "epp_sendv" is legal, and that "LIPP" is being used.
*
* NOTE: Failing to free completions for an arbitrarily long time
* (which is defined to be illegal) does in fact cause bizarre
* problems. The "egress_timer" helps prevent this from happening.
*
* NOTE: The egress code can be interrupted by the interrupt handler.
*/
/* HACK: Allow use of "jumbo" packets. */
/* This should be 1500 if "jumbo" is not set in LIPP. */
/* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
/* ISSUE: This has not been thoroughly tested (except at 1500). */
#define TILE_NET_MTU 1500
/* HACK: Define to support GSO. */
/* ISSUE: This may actually hurt performance of the TCP blaster. */
/* #define TILE_NET_GSO */
/* Define this to collapse "duplicate" acks. */
/* #define IGNORE_DUP_ACKS */
/* HACK: Define this to verify incoming packets. */
/* #define TILE_NET_VERIFY_INGRESS */
/* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */
#define TILE_NET_TX_QUEUE_LEN 0
/* Define to dump packets (prints out the whole packet on tx and rx). */
/* #define TILE_NET_DUMP_PACKETS */
/* Define to enable debug spew (all PDEBUG's are enabled). */
/* #define TILE_NET_DEBUG */
/* Define to activate paranoia checks. */
/* #define TILE_NET_PARANOIA */
/* Default transmit lockup timeout period, in jiffies. */
#define TILE_NET_TIMEOUT (5 * HZ)
/* Default retry interval for bringing up the NetIO interface, in jiffies. */
#define TILE_NET_RETRY_INTERVAL (5 * HZ)
/* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */
#define TILE_NET_DEVS 4
/* Paranoia. */
#if NET_IP_ALIGN != LIPP_PACKET_PADDING
#error "NET_IP_ALIGN must match LIPP_PACKET_PADDING."
#endif
/* Debug print. */
#ifdef TILE_NET_DEBUG
#define PDEBUG(fmt, args...) net_printk(fmt, ## args)
#else
#define PDEBUG(fmt, args...)
#endif
MODULE_AUTHOR("Tilera");
MODULE_LICENSE("GPL");
#define IS_MULTICAST(mac_addr) \
(((u8 *)(mac_addr))[0] & 0x01)
#define IS_BROADCAST(mac_addr) \
(((u16 *)(mac_addr))[0] == 0xffff)
/*
* Queue of incoming packets for a specific cpu and device.
*
* Includes a pointer to the "system" data, and the actual "user" data.
*/
struct tile_netio_queue {
netio_queue_impl_t *__system_part;
netio_queue_user_impl_t __user_part;
};
/*
* Statistics counters for a specific cpu and device.
*/
struct tile_net_stats_t {
u32 rx_packets;
u32 rx_bytes;
u32 tx_packets;
u32 tx_bytes;
};
/*
* Info for a specific cpu and device.
*
* ISSUE: There is a "dev" pointer in "napi" as well.
*/
struct tile_net_cpu {
/* The NAPI struct. */
struct napi_struct napi;
/* Packet queue. */
struct tile_netio_queue queue;
/* Statistics. */
struct tile_net_stats_t stats;
/* ISSUE: Is this needed? */
bool napi_enabled;
/* True if this tile has succcessfully registered with the IPP. */
bool registered;
/* True if the link was down last time we tried to register. */
bool link_down;
/* True if "egress_timer" is scheduled. */
bool egress_timer_scheduled;
/* Number of small sk_buffs which must still be provided. */
unsigned int num_needed_small_buffers;
/* Number of large sk_buffs which must still be provided. */
unsigned int num_needed_large_buffers;
/* A timer for handling egress completions. */
struct timer_list egress_timer;
};
/*
* Info for a specific device.
*/
struct tile_net_priv {
/* Our network device. */
struct net_device *dev;
/* The actual egress queue. */
lepp_queue_t *epp_queue;
/* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */
spinlock_t cmd_lock;
/* Protects "epp_queue->comp_head". */
spinlock_t comp_lock;
/* The hypervisor handle for this interface. */
int hv_devhdl;
/* The intr bit mask that IDs this device. */
u32 intr_id;
/* True iff "tile_net_open_aux()" has succeeded. */
int partly_opened;
/* True iff "tile_net_open_inner()" has succeeded. */
int fully_opened;
/* Effective network cpus. */
struct cpumask network_cpus_map;
/* Number of network cpus. */
int network_cpus_count;
/* Credits per network cpu. */
int network_cpus_credits;
/* Network stats. */
struct net_device_stats stats;
/* For NetIO bringup retries. */
struct delayed_work retry_work;
/* Quick access to per cpu data. */
struct tile_net_cpu *cpu[NR_CPUS];
};
/*
* The actual devices (xgbe0, xgbe1, gbe0, gbe1).
*/
static struct net_device *tile_net_devs[TILE_NET_DEVS];
/*
* The "tile_net_cpu" structures for each device.
*/
static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0);
static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1);
static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0);
static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1);
/*
* True if "network_cpus" was specified.
*/
static bool network_cpus_used;
/*
* The actual cpus in "network_cpus".
*/
static struct cpumask network_cpus_map;
#ifdef TILE_NET_DEBUG
/*
* printk with extra stuff.
*
* We print the CPU we're running in brackets.
*/
static void net_printk(char *fmt, ...)
{
int i;
int len;
va_list args;
static char buf[256];
len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id());
va_start(args, fmt);
i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args);
va_end(args);
buf[255] = '\0';
pr_notice(buf);
}
#endif
#ifdef TILE_NET_DUMP_PACKETS
/*
* Dump a packet.
*/
static void dump_packet(unsigned char *data, unsigned long length, char *s)
{
unsigned long i;
static unsigned int count;
pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
data, length, s, count++);
pr_info("\n");
for (i = 0; i < length; i++) {
if ((i & 0xf) == 0)
sprintf(buf, "%8.8lx:", i);
sprintf(buf + strlen(buf), " %2.2x", data[i]);
if ((i & 0xf) == 0xf || i == length - 1)
pr_info("%s\n", buf);
}
}
#endif
/*
* Provide support for the __netio_fastio1() swint
* (see <hv/drv_xgbe_intf.h> for how it is used).
*
* The fastio swint2 call may clobber all the caller-saved registers.
* It rarely clobbers memory, but we allow for the possibility in
* the signature just to be on the safe side.
*
* Also, gcc doesn't seem to allow an input operand to be
* clobbered, so we fake it with dummy outputs.
*
* This function can't be static because of the way it is declared
* in the netio header.
*/
inline int __netio_fastio1(u32 fastio_index, u32 arg0)
{
long result, clobber_r1, clobber_r10;
asm volatile("swint2"
: "=R00" (result),
"=R01" (clobber_r1), "=R10" (clobber_r10)
: "R10" (fastio_index), "R01" (arg0)
: "memory", "r2", "r3", "r4",
"r5", "r6", "r7", "r8", "r9",
"r11", "r12", "r13", "r14",
"r15", "r16", "r17", "r18", "r19",
"r20", "r21", "r22", "r23", "r24",
"r25", "r26", "r27", "r28", "r29");
return result;
}
/*
* Provide a linux buffer to LIPP.
*/
static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
void *va, bool small)
{
struct tile_netio_queue *queue = &info->queue;
/* Convert "va" and "small" to "linux_buffer_t". */
unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small;
__netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer);
}
/*
* Provide a linux buffer for LIPP.
*/
static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
bool small)
{
/* ISSUE: What should we use here? */
unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
/* Round up to ensure to avoid "false sharing" with last cache line. */
unsigned int buffer_size =
(((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
/*
* ISSUE: Since CPAs are 38 bits, and we can only encode the
* high 31 bits in a "linux_buffer_t", the low 7 bits must be
* zero, and thus, we must align the actual "va" mod 128.
*/
const unsigned long align = 128;
struct sk_buff *skb;
void *va;
struct sk_buff **skb_ptr;
/* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */
/* and also "reserves" that many bytes. */
/* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */
int len = sizeof(*skb_ptr) + align + buffer_size;
while (1) {
/* Allocate (or fail). */
skb = dev_alloc_skb(len);
if (skb == NULL)
return false;
/* Make room for a back-pointer to 'skb'. */
skb_reserve(skb, sizeof(*skb_ptr));
/* Make sure we are aligned. */
skb_reserve(skb, -(long)skb->data & (align - 1));
/* This address is given to IPP. */
va = skb->data;
if (small)
break;
/* ISSUE: This has never been observed! */
/* Large buffers must not span a huge page. */
if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0)
break;
pr_err("Leaking unaligned linux buffer at %p.\n", va);
}
/* Skip two bytes to satisfy LIPP assumptions. */
/* Note that this aligns IP on a 16 byte boundary. */
/* ISSUE: Do this when the packet arrives? */
skb_reserve(skb, NET_IP_ALIGN);
/* Save a back-pointer to 'skb'. */
skb_ptr = va - sizeof(*skb_ptr);
*skb_ptr = skb;
/* Invalidate the packet buffer. */
if (!hash_default)
__inv_buffer(skb->data, buffer_size);
/* Make sure "skb_ptr" has been flushed. */
__insn_mf();
#ifdef TILE_NET_PARANOIA
#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default) {
HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
panic("Non-coherent ingress buffer!");
}
#endif
#endif
/* Provide the new buffer. */
tile_net_provide_linux_buffer(info, va, small);
return true;
}
/*
* Provide linux buffers for LIPP.
*/
static void tile_net_provide_needed_buffers(struct tile_net_cpu *info)
{
while (info->num_needed_small_buffers != 0) {
if (!tile_net_provide_needed_buffer(info, true))
goto oops;
info->num_needed_small_buffers--;
}
while (info->num_needed_large_buffers != 0) {
if (!tile_net_provide_needed_buffer(info, false))
goto oops;
info->num_needed_large_buffers--;
}
return;
oops:
/* Add a description to the page allocation failure dump. */
pr_notice("Could not provide a linux buffer to LIPP.\n");
}
/*
* Grab some LEPP completions, and store them in "comps", of size
* "comps_size", and return the number of completions which were
* stored, so the caller can free them.
*
* If "pending" is not NULL, it will be set to true if there might
* still be some pending completions caused by this tile, else false.
*/
static unsigned int tile_net_lepp_grab_comps(struct net_device *dev,
struct sk_buff *comps[],
unsigned int comps_size,
bool *pending)
{
struct tile_net_priv *priv = netdev_priv(dev);
lepp_queue_t *eq = priv->epp_queue;
unsigned int n = 0;
unsigned int comp_head;
unsigned int comp_busy;
unsigned int comp_tail;
spin_lock(&priv->comp_lock);
comp_head = eq->comp_head;
comp_busy = eq->comp_busy;
comp_tail = eq->comp_tail;
while (comp_head != comp_busy && n < comps_size) {
comps[n++] = eq->comps[comp_head];
LEPP_QINC(comp_head);
}
if (pending != NULL)
*pending = (comp_head != comp_tail);
eq->comp_head = comp_head;
spin_unlock(&priv->comp_lock);
return n;
}
/*
* Make sure the egress timer is scheduled.
*
* Note that we use "schedule if not scheduled" logic instead of the more
* obvious "reschedule" logic, because "reschedule" is fairly expensive.
*/
static void tile_net_schedule_egress_timer(struct tile_net_cpu *info)
{
if (!info->egress_timer_scheduled) {
mod_timer_pinned(&info->egress_timer, jiffies + 1);
info->egress_timer_scheduled = true;
}
}
/*
* The "function" for "info->egress_timer".
*
* This timer will reschedule itself as long as there are any pending
* completions expected (on behalf of any tile).
*
* ISSUE: Realistically, will the timer ever stop scheduling itself?
*
* ISSUE: This timer is almost never actually needed, so just use a global
* timer that can run on any tile.
*
* ISSUE: Maybe instead track number of expected completions, and free
* only that many, resetting to zero if "pending" is ever false.
*/
static void tile_net_handle_egress_timer(unsigned long arg)
{
struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
struct net_device *dev = info->napi.dev;
struct sk_buff *olds[32];
unsigned int wanted = 32;
unsigned int i, nolds = 0;
bool pending;
/* The timer is no longer scheduled. */
info->egress_timer_scheduled = false;
nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending);
for (i = 0; i < nolds; i++)
kfree_skb(olds[i]);
/* Reschedule timer if needed. */
if (pending)
tile_net_schedule_egress_timer(info);
}
#ifdef IGNORE_DUP_ACKS
/*
* Help detect "duplicate" ACKs. These are sequential packets (for a
* given flow) which are exactly 66 bytes long, sharing everything but
* ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32,
* Tstamps=10@0x38. The ID's are +1, the Hsum's are -1, the Ack's are
* +N, and the Tstamps are usually identical.
*
* NOTE: Apparently truly duplicate acks (with identical "ack" values),
* should not be collapsed, as they are used for some kind of flow control.
*/
static bool is_dup_ack(char *s1, char *s2, unsigned int len)
{
int i;
unsigned long long ignorable = 0;
/* Identification. */
ignorable |= (1ULL << 0x12);
ignorable |= (1ULL << 0x13);
/* Header checksum. */
ignorable |= (1ULL << 0x18);
ignorable |= (1ULL << 0x19);
/* ACK. */
ignorable |= (1ULL << 0x2a);
ignorable |= (1ULL << 0x2b);
ignorable |= (1ULL << 0x2c);
ignorable |= (1ULL << 0x2d);
/* WinSize. */
ignorable |= (1ULL << 0x30);
ignorable |= (1ULL << 0x31);
/* Checksum. */
ignorable |= (1ULL << 0x32);
ignorable |= (1ULL << 0x33);
for (i = 0; i < len; i++, ignorable >>= 1) {
if ((ignorable & 1) || (s1[i] == s2[i]))
continue;
#ifdef TILE_NET_DEBUG
/* HACK: Mention non-timestamp diffs. */
if (i < 0x38 && i != 0x2f &&
net_ratelimit())
pr_info("Diff at 0x%x\n", i);
#endif
return false;
}
#ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS
/* HACK: Do not suppress truly duplicate ACKs. */
/* ISSUE: Is this actually necessary or helpful? */
if (s1[0x2a] == s2[0x2a] &&
s1[0x2b] == s2[0x2b] &&
s1[0x2c] == s2[0x2c] &&
s1[0x2d] == s2[0x2d]) {
return false;
}
#endif
return true;
}
#endif
/*
* Like "tile_net_handle_packets()", but just discard packets.
*/
static void tile_net_discard_packets(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
struct tile_netio_queue *queue = &info->queue;
netio_queue_impl_t *qsp = queue->__system_part;
netio_queue_user_impl_t *qup = &queue->__user_part;
while (qup->__packet_receive_read !=
qsp->__packet_receive_queue.__packet_write) {
int index = qup->__packet_receive_read;
int index2_aux = index + sizeof(netio_pkt_t);
int index2 =
((index2_aux ==
qsp->__packet_receive_queue.__last_packet_plus_one) ?
0 : index2_aux);
netio_pkt_t *pkt = (netio_pkt_t *)
((unsigned long) &qsp[1] + index);
/* Extract the "linux_buffer_t". */
unsigned int buffer = pkt->__packet.word;
/* Convert "linux_buffer_t" to "va". */
void *va = __va((phys_addr_t)(buffer >> 1) << 7);
/* Acquire the associated "skb". */
struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
struct sk_buff *skb = *skb_ptr;
kfree_skb(skb);
/* Consume this packet. */
qup->__packet_receive_read = index2;
}
}
/*
* Handle the next packet. Return true if "processed", false if "filtered".
*/
static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
{
struct net_device *dev = info->napi.dev;
struct tile_netio_queue *queue = &info->queue;
netio_queue_impl_t *qsp = queue->__system_part;
netio_queue_user_impl_t *qup = &queue->__user_part;
struct tile_net_stats_t *stats = &info->stats;
int filter;
int index2_aux = index + sizeof(netio_pkt_t);
int index2 =
((index2_aux ==
qsp->__packet_receive_queue.__last_packet_plus_one) ?
0 : index2_aux);
netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
/* Extract the packet size. */
unsigned long len =
(NETIO_PKT_CUSTOM_LENGTH(pkt) +
NET_IP_ALIGN - NETIO_PACKET_PADDING);
/* Extract the "linux_buffer_t". */
unsigned int buffer = pkt->__packet.word;
/* Extract "small" (vs "large"). */
bool small = ((buffer & 1) != 0);
/* Convert "linux_buffer_t" to "va". */
void *va = __va((phys_addr_t)(buffer >> 1) << 7);
/* Extract the packet data pointer. */
/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
unsigned char *buf = va + NET_IP_ALIGN;
#ifdef IGNORE_DUP_ACKS
static int other;
static int final;
static int keep;
static int skip;
#endif
/* Invalidate the packet buffer. */
if (!hash_default)
__inv_buffer(buf, len);
/* ISSUE: Is this needed? */
dev->last_rx = jiffies;
#ifdef TILE_NET_DUMP_PACKETS
dump_packet(buf, len, "rx");
#endif /* TILE_NET_DUMP_PACKETS */
#ifdef TILE_NET_VERIFY_INGRESS
if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
/*
* FIXME: This complains about UDP packets
* with a "zero" checksum (bug 6624).
*/
#ifdef TILE_NET_PANIC_ON_BAD
dump_packet(buf, len, "rx");
panic("Bad L4 checksum.");
#else
pr_warning("Bad L4 checksum on %d byte packet.\n", len);
#endif
}
if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
dump_packet(buf, len, "rx");
panic("Bad L3 checksum.");
}
switch (NETIO_PKT_STATUS_M(metadata, pkt)) {
case NETIO_PKT_STATUS_OVERSIZE:
if (len >= 64) {
dump_packet(buf, len, "rx");
panic("Unexpected OVERSIZE.");
}
break;
case NETIO_PKT_STATUS_BAD:
#ifdef TILE_NET_PANIC_ON_BAD
dump_packet(buf, len, "rx");
panic("Unexpected BAD packet.");
#else
pr_warning("Unexpected BAD %d byte packet.\n", len);
#endif
}
#endif
filter = 0;
if (!(dev->flags & IFF_UP)) {
/* Filter packets received before we're up. */
filter = 1;
} else if (!(dev->flags & IFF_PROMISC)) {
/*
* FIXME: Implement HW multicast filter.
*/
if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) {
/* Filter packets not for our address. */
const u8 *mine = dev->dev_addr;
filter = compare_ether_addr(mine, buf);
}
}
#ifdef IGNORE_DUP_ACKS
if (len != 66) {
/* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
other++;
} else if (index2 ==
qsp->__packet_receive_queue.__packet_write) {
final++;
} else {
netio_pkt_t *pkt2 = (netio_pkt_t *)
((unsigned long) &qsp[1] + index2);
netio_pkt_metadata_t *metadata2 =
NETIO_PKT_METADATA(pkt2);
/* Extract the packet size. */
unsigned long len2 =
(NETIO_PKT_CUSTOM_LENGTH(pkt2) +
NET_IP_ALIGN - NETIO_PACKET_PADDING);
if (len2 == 66 &&
NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
/* Extract the "linux_buffer_t". */
unsigned int buffer2 = pkt2->__packet.word;
/* Convert "linux_buffer_t" to "va". */
void *va2 =
__va((phys_addr_t)(buffer2 >> 1) << 7);
/* Extract the packet data pointer. */
/* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
unsigned char *buf2 = va2 + NET_IP_ALIGN;
/* Invalidate the packet buffer. */
if (!hash_default)
__inv_buffer(buf2, len2);
if (is_dup_ack(buf, buf2, len)) {
skip++;
filter = 1;
} else {
keep++;
}
}
}
if (net_ratelimit())
pr_info("Other %d Final %d Keep %d Skip %d.\n",
other, final, keep, skip);
#endif
if (filter) {
/* ISSUE: Update "drop" statistics? */
tile_net_provide_linux_buffer(info, va, small);
} else {
/* Acquire the associated "skb". */
struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
struct sk_buff *skb = *skb_ptr;
/* Paranoia. */
if (skb->data != buf)
panic("Corrupt linux buffer from LIPP! "
"VA=%p, skb=%p, skb->data=%p\n",
va, skb, skb->data);
/* Encode the actual packet length. */
skb_put(skb, len);
/* NOTE: This call also sets "skb->dev = dev". */
skb->protocol = eth_type_trans(skb, dev);
/* ISSUE: Discard corrupt packets? */
/* ISSUE: Discard packets with bad checksums? */
/* Avoid recomputing TCP/UDP checksums. */
if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
skb->ip_summed = CHECKSUM_UNNECESSARY;
netif_receive_skb(skb);
stats->rx_packets++;
stats->rx_bytes += len;
if (small)
info->num_needed_small_buffers++;
else
info->num_needed_large_buffers++;
}
/* Return four credits after every fourth packet. */
if (--qup->__receive_credit_remaining == 0) {
u32 interval = qup->__receive_credit_interval;
qup->__receive_credit_remaining = interval;
__netio_fastio_return_credits(qup->__fastio_index, interval);
}
/* Consume this packet. */
qup->__packet_receive_read = index2;
return !filter;
}
/*
* Handle some packets for the given device on the current CPU.
*
* ISSUE: The "rotting packet" race condition occurs if a packet
* arrives after the queue appears to be empty, and before the
* hypervisor interrupt is re-enabled.
*/
static int tile_net_poll(struct napi_struct *napi, int budget)
{
struct net_device *dev = napi->dev;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
struct tile_netio_queue *queue = &info->queue;
netio_queue_impl_t *qsp = queue->__system_part;
netio_queue_user_impl_t *qup = &queue->__user_part;
unsigned int work = 0;
while (1) {
int index = qup->__packet_receive_read;
if (index == qsp->__packet_receive_queue.__packet_write)
break;
if (tile_net_poll_aux(info, index)) {
if (++work >= budget)
goto done;
}
}
napi_complete(&info->napi);
/* Re-enable hypervisor interrupts. */
enable_percpu_irq(priv->intr_id);
/* HACK: Avoid the "rotting packet" problem. */
if (qup->__packet_receive_read !=
qsp->__packet_receive_queue.__packet_write)
napi_schedule(&info->napi);
/* ISSUE: Handle completions? */
done:
tile_net_provide_needed_buffers(info);
return work;
}
/*
* Handle an ingress interrupt for the given device on the current cpu.
*/
static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
{
struct net_device *dev = (struct net_device *)dev_ptr;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
/* Disable hypervisor interrupt. */
disable_percpu_irq(priv->intr_id);
napi_schedule(&info->napi);
return IRQ_HANDLED;
}
/*
* One time initialization per interface.
*/
static int tile_net_open_aux(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int ret;
int dummy;
unsigned int epp_lotar;
/*
* Find out where EPP memory should be homed.
*/
ret = hv_dev_pread(priv->hv_devhdl, 0,
(HV_VirtAddr)&epp_lotar, sizeof(epp_lotar),
NETIO_EPP_SHM_OFF);
if (ret < 0) {
pr_err("could not read epp_shm_queue lotar.\n");
return -EIO;
}
/*
* Home the page on the EPP.
*/
{
int epp_home = hv_lotar_to_cpu(epp_lotar);
struct page *page = virt_to_page(priv->epp_queue);
homecache_change_page_home(page, 0, epp_home);
}
/*
* Register the EPP shared memory queue.
*/
{
netio_ipp_address_t ea = {
.va = 0,
.pa = __pa(priv->epp_queue),
.pte = hv_pte(0),
.size = PAGE_SIZE,
};
ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
ret = hv_dev_pwrite(priv->hv_devhdl, 0,
(HV_VirtAddr)&ea,
sizeof(ea),
NETIO_EPP_SHM_OFF);
if (ret < 0)
return -EIO;
}
/*
* Start LIPP/LEPP.
*/
if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) {
pr_warning("Failed to start LIPP/LEPP.\n");
return -EIO;
}
return 0;
}
/*
* Register with hypervisor on each CPU.
*
* Strangely, this function does important things even if it "fails",
* which is especially common if the link is not up yet. Hopefully
* these things are all "harmless" if done twice!
*/
static void tile_net_register(void *dev_ptr)
{
struct net_device *dev = (struct net_device *)dev_ptr;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info;
struct tile_netio_queue *queue;
/* Only network cpus can receive packets. */
int queue_id =
cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255;
netio_input_config_t config = {
.flags = 0,
.num_receive_packets = priv->network_cpus_credits,
.queue_id = queue_id
};
int ret = 0;
netio_queue_impl_t *queuep;
PDEBUG("tile_net_register(queue_id %d)\n", queue_id);
if (!strcmp(dev->name, "xgbe0"))
info = &__get_cpu_var(hv_xgbe0);
else if (!strcmp(dev->name, "xgbe1"))
info = &__get_cpu_var(hv_xgbe1);
else if (!strcmp(dev->name, "gbe0"))
info = &__get_cpu_var(hv_gbe0);
else if (!strcmp(dev->name, "gbe1"))
info = &__get_cpu_var(hv_gbe1);
else
BUG();
/* Initialize the egress timer. */
init_timer(&info->egress_timer);
info->egress_timer.data = (long)info;
info->egress_timer.function = tile_net_handle_egress_timer;
priv->cpu[my_cpu] = info;
/*
* Register ourselves with the IPP.
*/
ret = hv_dev_pwrite(priv->hv_devhdl, 0,
(HV_VirtAddr)&config,
sizeof(netio_input_config_t),
NETIO_IPP_INPUT_REGISTER_OFF);
PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
ret);
if (ret < 0) {
printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF"
" failure %d\n", ret);
info->link_down = (ret == NETIO_LINK_DOWN);
return;
}
/*
* Get the pointer to our queue's system part.
*/
ret = hv_dev_pread(priv->hv_devhdl, 0,
(HV_VirtAddr)&queuep,
sizeof(netio_queue_impl_t *),
NETIO_IPP_INPUT_REGISTER_OFF);
PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
ret);
PDEBUG("queuep %p\n", queuep);
if (ret <= 0) {
/* ISSUE: Shouldn't this be a fatal error? */
pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n");
return;
}
queue = &info->queue;
queue->__system_part = queuep;
memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t));
/* This is traditionally "config.num_receive_packets / 2". */
queue->__user_part.__receive_credit_interval = 4;
queue->__user_part.__receive_credit_remaining =
queue->__user_part.__receive_credit_interval;
/*
* Get a fastio index from the hypervisor.
* ISSUE: Shouldn't this check the result?
*/
ret = hv_dev_pread(priv->hv_devhdl, 0,
(HV_VirtAddr)&queue->__user_part.__fastio_index,
sizeof(queue->__user_part.__fastio_index),
NETIO_IPP_GET_FASTIO_OFF);
PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
netif_napi_add(dev, &info->napi, tile_net_poll, 64);
/* Now we are registered. */
info->registered = true;
}
/*
* Unregister with hypervisor on each CPU.
*/
static void tile_net_unregister(void *dev_ptr)
{
struct net_device *dev = (struct net_device *)dev_ptr;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
int ret = 0;
int dummy = 0;
/* Do nothing if never registered. */
if (info == NULL)
return;
/* Do nothing if already unregistered. */
if (!info->registered)
return;
/*
* Unregister ourselves with LIPP.
*/
ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n",
ret);
if (ret < 0) {
/* FIXME: Just panic? */
pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
" failure %d\n", ret);
}
/*
* Discard all packets still in our NetIO queue. Hopefully,
* once the unregister call is complete, there will be no
* packets still in flight on the IDN.
*/
tile_net_discard_packets(dev);
/* Reset state. */
info->num_needed_small_buffers = 0;
info->num_needed_large_buffers = 0;
/* Cancel egress timer. */
del_timer(&info->egress_timer);
info->egress_timer_scheduled = false;
netif_napi_del(&info->napi);
/* Now we are unregistered. */
info->registered = false;
}
/*
* Helper function for "tile_net_stop()".
*
* Also used to handle registration failure in "tile_net_open_inner()",
* when "fully_opened" is known to be false, and the various extra
* steps in "tile_net_stop()" are not necessary. ISSUE: It might be
* simpler if we could just call "tile_net_stop()" anyway.
*/
static void tile_net_stop_aux(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int dummy = 0;
/* Unregister all tiles, so LIPP will stop delivering packets. */
on_each_cpu(tile_net_unregister, (void *)dev, 1);
/* Stop LIPP/LEPP. */
if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0)
panic("Failed to stop LIPP/LEPP!\n");
priv->partly_opened = 0;
}
/*
* Disable ingress interrupts for the given device on the current cpu.
*/
static void tile_net_disable_intr(void *dev_ptr)
{
struct net_device *dev = (struct net_device *)dev_ptr;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
/* Disable hypervisor interrupt. */
disable_percpu_irq(priv->intr_id);
/* Disable NAPI if needed. */
if (info != NULL && info->napi_enabled) {
napi_disable(&info->napi);
info->napi_enabled = false;
}
}
/*
* Enable ingress interrupts for the given device on the current cpu.
*/
static void tile_net_enable_intr(void *dev_ptr)
{
struct net_device *dev = (struct net_device *)dev_ptr;
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
/* Enable hypervisor interrupt. */
enable_percpu_irq(priv->intr_id);
/* Enable NAPI. */
napi_enable(&info->napi);
info->napi_enabled = true;
}
/*
* tile_net_open_inner does most of the work of bringing up the interface.
* It's called from tile_net_open(), and also from tile_net_retry_open().
* The return value is 0 if the interface was brought up, < 0 if
* tile_net_open() should return the return value as an error, and > 0 if
* tile_net_open() should return success and schedule a work item to
* periodically retry the bringup.
*/
static int tile_net_open_inner(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info;
struct tile_netio_queue *queue;
unsigned int irq;
int i;
/*
* First try to register just on the local CPU, and handle any
* semi-expected "link down" failure specially. Note that we
* do NOT call "tile_net_stop_aux()", unlike below.
*/
tile_net_register(dev);
info = priv->cpu[my_cpu];
if (!info->registered) {
if (info->link_down)
return 1;
return -EAGAIN;
}
/*
* Now register everywhere else. If any registration fails,
* even for "link down" (which might not be possible), we
* clean up using "tile_net_stop_aux()".
*/
smp_call_function(tile_net_register, (void *)dev, 1);
for_each_online_cpu(i) {
if (!priv->cpu[i]->registered) {
tile_net_stop_aux(dev);
return -EAGAIN;
}
}
queue = &info->queue;
/*
* Set the device intr bit mask.
* The tile_net_register above sets per tile __intr_id.
*/
priv->intr_id = queue->__system_part->__intr_id;
BUG_ON(!priv->intr_id);
/*
* Register the device interrupt handler.
* The __ffs() function returns the index into the interrupt handler
* table from the interrupt bit mask which should have one bit
* and one bit only set.
*/
irq = __ffs(priv->intr_id);
tile_irq_activate(irq, TILE_IRQ_PERCPU);
BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
0, dev->name, (void *)dev) != 0);
/* ISSUE: How could "priv->fully_opened" ever be "true" here? */
if (!priv->fully_opened) {
int dummy = 0;
/* Allocate initial buffers. */
int max_buffers =
priv->network_cpus_count * priv->network_cpus_credits;
info->num_needed_small_buffers =
min(LIPP_SMALL_BUFFERS, max_buffers);
info->num_needed_large_buffers =
min(LIPP_LARGE_BUFFERS, max_buffers);
tile_net_provide_needed_buffers(info);
if (info->num_needed_small_buffers != 0 ||
info->num_needed_large_buffers != 0)
panic("Insufficient memory for buffer stack!");
/* Start LIPP/LEPP and activate "ingress" at the shim. */
if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
panic("Failed to activate the LIPP Shim!\n");
priv->fully_opened = 1;
}
/* On each tile, enable the hypervisor to trigger interrupts. */
/* ISSUE: Do this before starting LIPP/LEPP? */
on_each_cpu(tile_net_enable_intr, (void *)dev, 1);
/* Start our transmit queue. */
netif_start_queue(dev);
return 0;
}
/*
* Called periodically to retry bringing up the NetIO interface,
* if it doesn't come up cleanly during tile_net_open().
*/
static void tile_net_open_retry(struct work_struct *w)
{
struct delayed_work *dw =
container_of(w, struct delayed_work, work);
struct tile_net_priv *priv =
container_of(dw, struct tile_net_priv, retry_work);
/*
* Try to bring the NetIO interface up. If it fails, reschedule
* ourselves to try again later; otherwise, tell Linux we now have
* a working link. ISSUE: What if the return value is negative?
*/
if (tile_net_open_inner(priv->dev))
schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
TILE_NET_RETRY_INTERVAL);
else
netif_carrier_on(priv->dev);
}
/*
* Called when a network interface is made active.
*
* Returns 0 on success, negative value on failure.
*
* The open entry point is called when a network interface is made
* active by the system (IFF_UP). At this point all resources needed
* for transmit and receive operations are allocated, the interrupt
* handler is registered with the OS, the watchdog timer is started,
* and the stack is notified that the interface is ready.
*
* If the actual link is not available yet, then we tell Linux that
* we have no carrier, and we keep checking until the link comes up.
*/
static int tile_net_open(struct net_device *dev)
{
int ret = 0;
struct tile_net_priv *priv = netdev_priv(dev);
/*
* We rely on priv->partly_opened to tell us if this is the
* first time this interface is being brought up. If it is
* set, the IPP was already initialized and should not be
* initialized again.
*/
if (!priv->partly_opened) {
int count;
int credits;
/* Initialize LIPP/LEPP, and start the Shim. */
ret = tile_net_open_aux(dev);
if (ret < 0) {
pr_err("tile_net_open_aux failed: %d\n", ret);
return ret;
}
/* Analyze the network cpus. */
if (network_cpus_used)
cpumask_copy(&priv->network_cpus_map,
&network_cpus_map);
else
cpumask_copy(&priv->network_cpus_map, cpu_online_mask);
count = cpumask_weight(&priv->network_cpus_map);
/* Limit credits to available buffers, and apply min. */
credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1);
/* Apply "GBE" max limit. */
/* ISSUE: Use higher limit for XGBE? */
credits = min(NETIO_MAX_RECEIVE_PKTS, credits);
priv->network_cpus_count = count;
priv->network_cpus_credits = credits;
#ifdef TILE_NET_DEBUG
pr_info("Using %d network cpus, with %d credits each\n",
priv->network_cpus_count, priv->network_cpus_credits);
#endif
priv->partly_opened = 1;
}
/*
* Attempt to bring up the link.
*/
ret = tile_net_open_inner(dev);
if (ret <= 0) {
if (ret == 0)
netif_carrier_on(dev);
return ret;
}
/*
* We were unable to bring up the NetIO interface, but we want to
* try again in a little bit. Tell Linux that we have no carrier
* so it doesn't try to use the interface before the link comes up
* and then remember to try again later.
*/
netif_carrier_off(dev);
schedule_delayed_work_on(singlethread_cpu, &priv->retry_work,
TILE_NET_RETRY_INTERVAL);
return 0;
}
/*
* Disables a network interface.
*
* Returns 0, this is not allowed to fail.
*
* The close entry point is called when an interface is de-activated
* by the OS. The hardware is still under the drivers control, but
* needs to be disabled. A global MAC reset is issued to stop the
* hardware, and all transmit and receive resources are freed.
*
* ISSUE: Can this can be called while "tile_net_poll()" is running?
*/
static int tile_net_stop(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
bool pending = true;
PDEBUG("tile_net_stop()\n");
/* ISSUE: Only needed if not yet fully open. */
cancel_delayed_work_sync(&priv->retry_work);
/* Can't transmit any more. */
netif_stop_queue(dev);
/*
* Disable hypervisor interrupts on each tile.
*/
on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
/*
* Unregister the interrupt handler.
* The __ffs() function returns the index into the interrupt handler
* table from the interrupt bit mask which should have one bit
* and one bit only set.
*/
if (priv->intr_id)
free_irq(__ffs(priv->intr_id), dev);
/*
* Drain all the LIPP buffers.
*/
while (true) {
int buffer;
/* NOTE: This should never fail. */
if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer,
sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0)
break;
/* Stop when done. */
if (buffer == 0)
break;
{
/* Convert "linux_buffer_t" to "va". */
void *va = __va((phys_addr_t)(buffer >> 1) << 7);
/* Acquire the associated "skb". */
struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
struct sk_buff *skb = *skb_ptr;
kfree_skb(skb);
}
}
/* Stop LIPP/LEPP. */
tile_net_stop_aux(dev);
priv->fully_opened = 0;
/*
* XXX: ISSUE: It appears that, in practice anyway, by the
* time we get here, there are no pending completions.
*/
while (pending) {
struct sk_buff *olds[32];
unsigned int wanted = 32;
unsigned int i, nolds = 0;
nolds = tile_net_lepp_grab_comps(dev, olds,
wanted, &pending);
/* ISSUE: We have never actually seen this debug spew. */
if (nolds != 0)
pr_info("During tile_net_stop(), grabbed %d comps.\n",
nolds);
for (i = 0; i < nolds; i++)
kfree_skb(olds[i]);
}
/* Wipe the EPP queue. */
memset(priv->epp_queue, 0, sizeof(lepp_queue_t));
/* Evict the EPP queue. */
finv_buffer(priv->epp_queue, PAGE_SIZE);
return 0;
}
/*
* Prepare the "frags" info for the resulting LEPP command.
*
* If needed, flush the memory used by the frags.
*/
static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
struct sk_buff *skb,
void *b_data, unsigned int b_len)
{
unsigned int i, n = 0;
struct skb_shared_info *sh = skb_shinfo(skb);
phys_addr_t cpa;
if (b_len != 0) {
if (!hash_default)
finv_buffer_remote(b_data, b_len);
cpa = __pa(b_data);
frags[n].cpa_lo = cpa;
frags[n].cpa_hi = cpa >> 32;
frags[n].length = b_len;
frags[n].hash_for_home = hash_default;
n++;
}
for (i = 0; i < sh->nr_frags; i++) {
skb_frag_t *f = &sh->frags[i];
unsigned long pfn = page_to_pfn(f->page);
/* FIXME: Compute "hash_for_home" properly. */
/* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */
int hash_for_home = hash_default;
/* FIXME: Hmmm. */
if (!hash_default) {
void *va = pfn_to_kaddr(pfn) + f->page_offset;
BUG_ON(PageHighMem(f->page));
finv_buffer_remote(va, f->size);
}
cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
frags[n].cpa_lo = cpa;
frags[n].cpa_hi = cpa >> 32;
frags[n].length = f->size;
frags[n].hash_for_home = hash_for_home;
n++;
}
return n;
}
/*
* This function takes "skb", consisting of a header template and a
* payload, and hands it to LEPP, to emit as one or more segments,
* each consisting of a possibly modified header, plus a piece of the
* payload, via a process known as "tcp segmentation offload".
*
* Usually, "data" will contain the header template, of size "sh_len",
* and "sh->frags" will contain "skb->data_len" bytes of payload, and
* there will be "sh->gso_segs" segments.
*
* Sometimes, if "sendfile()" requires copying, we will be called with
* "data" containing the header and payload, with "frags" being empty.
*
* In theory, "sh->nr_frags" could be 3, but in practice, it seems
* that this will never actually happen.
*
* See "emulate_large_send_offload()" for some reference code, which
* does not handle checksumming.
*
* ISSUE: How do we make sure that high memory DMA does not migrate?
*/
static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
struct tile_net_stats_t *stats = &info->stats;
struct skb_shared_info *sh = skb_shinfo(skb);
unsigned char *data = skb->data;
/* The ip header follows the ethernet header. */
struct iphdr *ih = ip_hdr(skb);
unsigned int ih_len = ih->ihl * 4;
/* Note that "nh == ih", by definition. */
unsigned char *nh = skb_network_header(skb);
unsigned int eh_len = nh - data;
/* The tcp header follows the ip header. */
struct tcphdr *th = (struct tcphdr *)(nh + ih_len);
unsigned int th_len = th->doff * 4;
/* The total number of header bytes. */
/* NOTE: This may be less than skb_headlen(skb). */
unsigned int sh_len = eh_len + ih_len + th_len;
/* The number of payload bytes at "skb->data + sh_len". */
/* This is non-zero for sendfile() without HIGHDMA. */
unsigned int b_len = skb_headlen(skb) - sh_len;
/* The total number of payload bytes. */
unsigned int d_len = b_len + skb->data_len;
/* The maximum payload size. */
unsigned int p_len = sh->gso_size;
/* The total number of segments. */
unsigned int num_segs = sh->gso_segs;
/* The temporary copy of the command. */
u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4];
lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body;
/* Analyze the "frags". */
unsigned int num_frags =
tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len);
/* The size of the command, including frags and header. */
size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len);
/* The command header. */
lepp_tso_cmd_t cmd_init = {
.tso = true,
.header_size = sh_len,
.ip_offset = eh_len,
.tcp_offset = eh_len + ih_len,
.payload_size = p_len,
.num_frags = num_frags,
};
unsigned long irqflags;
lepp_queue_t *eq = priv->epp_queue;
struct sk_buff *olds[4];
unsigned int wanted = 4;
unsigned int i, nolds = 0;
unsigned int cmd_head, cmd_tail, cmd_next;
unsigned int comp_tail;
unsigned int free_slots;
/* Paranoia. */
BUG_ON(skb->protocol != htons(ETH_P_IP));
BUG_ON(ih->protocol != IPPROTO_TCP);
BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL);
BUG_ON(num_frags > LEPP_MAX_FRAGS);
/*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */
BUG_ON(num_segs <= 1);
/* Finish preparing the command. */
/* Copy the command header. */
*cmd = cmd_init;
/* Copy the "header". */
memcpy(&cmd->frags[num_frags], data, sh_len);
/* Prefetch and wait, to minimize time spent holding the spinlock. */
prefetch_L1(&eq->comp_tail);
prefetch_L1(&eq->cmd_tail);
mb();
/* Enqueue the command. */
spin_lock_irqsave(&priv->cmd_lock, irqflags);
/*
* Handle completions if needed to make room.
* HACK: Spin until there is sufficient room.
*/
free_slots = lepp_num_free_comp_slots(eq);
if (free_slots < 1) {
spin:
nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
wanted - nolds, NULL);
if (lepp_num_free_comp_slots(eq) < 1)
goto spin;
}
cmd_head = eq->cmd_head;
cmd_tail = eq->cmd_tail;
/* NOTE: The "gotos" below are untested. */
/* Prepare to advance, detecting full queue. */
cmd_next = cmd_tail + cmd_size;
if (cmd_tail < cmd_head && cmd_next >= cmd_head)
goto spin;
if (cmd_next > LEPP_CMD_LIMIT) {
cmd_next = 0;
if (cmd_next == cmd_head)
goto spin;
}
/* Copy the command. */
memcpy(&eq->cmds[cmd_tail], cmd, cmd_size);
/* Advance. */
cmd_tail = cmd_next;
/* Record "skb" for eventual freeing. */
comp_tail = eq->comp_tail;
eq->comps[comp_tail] = skb;
LEPP_QINC(comp_tail);
eq->comp_tail = comp_tail;
/* Flush before allowing LEPP to handle the command. */
__insn_mf();
eq->cmd_tail = cmd_tail;
spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
if (nolds == 0)
nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
/* Handle completions. */
for (i = 0; i < nolds; i++)
kfree_skb(olds[i]);
/* Update stats. */
stats->tx_packets += num_segs;
stats->tx_bytes += (num_segs * sh_len) + d_len;
/* Make sure the egress timer is scheduled. */
tile_net_schedule_egress_timer(info);
return NETDEV_TX_OK;
}
/*
* Transmit a packet (called by the kernel via "hard_start_xmit" hook).
*/
static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
int my_cpu = smp_processor_id();
struct tile_net_cpu *info = priv->cpu[my_cpu];
struct tile_net_stats_t *stats = &info->stats;
unsigned long irqflags;
struct skb_shared_info *sh = skb_shinfo(skb);
unsigned int len = skb->len;
unsigned char *data = skb->data;
unsigned int csum_start = skb->csum_start - skb_headroom(skb);
lepp_frag_t frags[LEPP_MAX_FRAGS];
unsigned int num_frags;
lepp_queue_t *eq = priv->epp_queue;
struct sk_buff *olds[4];
unsigned int wanted = 4;
unsigned int i, nolds = 0;
unsigned int cmd_size = sizeof(lepp_cmd_t);
unsigned int cmd_head, cmd_tail, cmd_next;
unsigned int comp_tail;
lepp_cmd_t cmds[LEPP_MAX_FRAGS];
unsigned int free_slots;
/*
* This is paranoia, since we think that if the link doesn't come
* up, telling Linux we have no carrier will keep it from trying
* to transmit. If it does, though, we can't execute this routine,
* since data structures we depend on aren't set up yet.
*/
if (!info->registered)
return NETDEV_TX_BUSY;
/* Save the timestamp. */
dev->trans_start = jiffies;
#ifdef TILE_NET_PARANOIA
#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default) {
HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
panic("Non-coherent egress buffer!");
}
#endif
#endif
#ifdef TILE_NET_DUMP_PACKETS
/* ISSUE: Does not dump the "frags". */
dump_packet(data, skb_headlen(skb), "tx");
#endif /* TILE_NET_DUMP_PACKETS */
if (sh->gso_size != 0)
return tile_net_tx_tso(skb, dev);
/* Prepare the commands. */
num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb));
for (i = 0; i < num_frags; i++) {
bool final = (i == num_frags - 1);
lepp_cmd_t cmd = {
.cpa_lo = frags[i].cpa_lo,
.cpa_hi = frags[i].cpa_hi,
.length = frags[i].length,
.hash_for_home = frags[i].hash_for_home,
.send_completion = final,
.end_of_packet = final
};
if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) {
cmd.compute_checksum = 1;
cmd.checksum_data.bits.start_byte = csum_start;
cmd.checksum_data.bits.count = len - csum_start;
cmd.checksum_data.bits.destination_byte =
csum_start + skb->csum_offset;
}
cmds[i] = cmd;
}
/* Prefetch and wait, to minimize time spent holding the spinlock. */
prefetch_L1(&eq->comp_tail);
prefetch_L1(&eq->cmd_tail);
mb();
/* Enqueue the commands. */
spin_lock_irqsave(&priv->cmd_lock, irqflags);
/*
* Handle completions if needed to make room.
* HACK: Spin until there is sufficient room.
*/
free_slots = lepp_num_free_comp_slots(eq);
if (free_slots < 1) {
spin:
nolds += tile_net_lepp_grab_comps(dev, olds + nolds,
wanted - nolds, NULL);
if (lepp_num_free_comp_slots(eq) < 1)
goto spin;
}
cmd_head = eq->cmd_head;
cmd_tail = eq->cmd_tail;
/* NOTE: The "gotos" below are untested. */
/* Copy the commands, or fail. */
for (i = 0; i < num_frags; i++) {
/* Prepare to advance, detecting full queue. */
cmd_next = cmd_tail + cmd_size;
if (cmd_tail < cmd_head && cmd_next >= cmd_head)
goto spin;
if (cmd_next > LEPP_CMD_LIMIT) {
cmd_next = 0;
if (cmd_next == cmd_head)
goto spin;
}
/* Copy the command. */
*(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i];
/* Advance. */
cmd_tail = cmd_next;
}
/* Record "skb" for eventual freeing. */
comp_tail = eq->comp_tail;
eq->comps[comp_tail] = skb;
LEPP_QINC(comp_tail);
eq->comp_tail = comp_tail;
/* Flush before allowing LEPP to handle the command. */
__insn_mf();
eq->cmd_tail = cmd_tail;
spin_unlock_irqrestore(&priv->cmd_lock, irqflags);
if (nolds == 0)
nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL);
/* Handle completions. */
for (i = 0; i < nolds; i++)
kfree_skb(olds[i]);
/* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */
stats->tx_packets++;
stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN);
/* Make sure the egress timer is scheduled. */
tile_net_schedule_egress_timer(info);
return NETDEV_TX_OK;
}
/*
* Deal with a transmit timeout.
*/
static void tile_net_tx_timeout(struct net_device *dev)
{
PDEBUG("tile_net_tx_timeout()\n");
PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies,
jiffies - dev->trans_start);
/* XXX: ISSUE: This doesn't seem useful for us. */
netif_wake_queue(dev);
}
/*
* Ioctl commands.
*/
static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
return -EOPNOTSUPP;
}
/*
* Get System Network Statistics.
*
* Returns the address of the device statistics structure.
*/
static struct net_device_stats *tile_net_get_stats(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
u32 rx_packets = 0;
u32 tx_packets = 0;
u32 rx_bytes = 0;
u32 tx_bytes = 0;
int i;
for_each_online_cpu(i) {
if (priv->cpu[i]) {
rx_packets += priv->cpu[i]->stats.rx_packets;
rx_bytes += priv->cpu[i]->stats.rx_bytes;
tx_packets += priv->cpu[i]->stats.tx_packets;
tx_bytes += priv->cpu[i]->stats.tx_bytes;
}
}
priv->stats.rx_packets = rx_packets;
priv->stats.rx_bytes = rx_bytes;
priv->stats.tx_packets = tx_packets;
priv->stats.tx_bytes = tx_bytes;
return &priv->stats;
}
/*
* Change the "mtu".
*
* The "change_mtu" method is usually not needed.
* If you need it, it must be like this.
*/
static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
{
PDEBUG("tile_net_change_mtu()\n");
/* Check ranges. */
if ((new_mtu < 68) || (new_mtu > 1500))
return -EINVAL;
/* Accept the value. */
dev->mtu = new_mtu;
return 0;
}
/*
* Change the Ethernet Address of the NIC.
*
* The hypervisor driver does not support changing MAC address. However,
* the IPP does not do anything with the MAC address, so the address which
* gets used on outgoing packets, and which is accepted on incoming packets,
* is completely up to the NetIO program or kernel driver which is actually
* handling them.
*
* Returns 0 on success, negative on failure.
*/
static int tile_net_set_mac_address(struct net_device *dev, void *p)
{
struct sockaddr *addr = p;
if (!is_valid_ether_addr(addr->sa_data))
return -EINVAL;
/* ISSUE: Note that "dev_addr" is now a pointer. */
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
return 0;
}
/*
* Obtain the MAC address from the hypervisor.
* This must be done before opening the device.
*/
static int tile_net_get_mac(struct net_device *dev)
{
struct tile_net_priv *priv = netdev_priv(dev);
char hv_dev_name[32];
int len;
__netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF };
int ret;
/* For example, "xgbe0". */
strcpy(hv_dev_name, dev->name);
len = strlen(hv_dev_name);
/* For example, "xgbe/0". */
hv_dev_name[len] = hv_dev_name[len - 1];
hv_dev_name[len - 1] = '/';
len++;
/* For example, "xgbe/0/native_hash". */
strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native");
/* Get the hypervisor handle for this device. */
priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0);
PDEBUG("hv_dev_open(%s) returned %d %p\n",
hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl);
if (priv->hv_devhdl < 0) {
if (priv->hv_devhdl == HV_ENODEV)
printk(KERN_DEBUG "Ignoring unconfigured device %s\n",
hv_dev_name);
else
printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n",
hv_dev_name, priv->hv_devhdl);
return -1;
}
/*
* Read the hardware address from the hypervisor.
* ISSUE: Note that "dev_addr" is now a pointer.
*/
offset.bits.class = NETIO_PARAM;
offset.bits.addr = NETIO_PARAM_MAC;
ret = hv_dev_pread(priv->hv_devhdl, 0,
(HV_VirtAddr)dev->dev_addr, dev->addr_len,
offset.word);
PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret);
if (ret <= 0) {
printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n",
dev->name);
/*
* Since the device is configured by the hypervisor but we
* can't get its MAC address, we are most likely running
* the simulator, so let's generate a random MAC address.
*/
random_ether_addr(dev->dev_addr);
}
return 0;
}
static struct net_device_ops tile_net_ops = {
.ndo_open = tile_net_open,
.ndo_stop = tile_net_stop,
.ndo_start_xmit = tile_net_tx,
.ndo_do_ioctl = tile_net_ioctl,
.ndo_get_stats = tile_net_get_stats,
.ndo_change_mtu = tile_net_change_mtu,
.ndo_tx_timeout = tile_net_tx_timeout,
.ndo_set_mac_address = tile_net_set_mac_address
};
/*
* The setup function.
*
* This uses ether_setup() to assign various fields in dev, including
* setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields.
*/
static void tile_net_setup(struct net_device *dev)
{
PDEBUG("tile_net_setup()\n");
ether_setup(dev);
dev->netdev_ops = &tile_net_ops;
dev->watchdog_timeo = TILE_NET_TIMEOUT;
/* We want lockless xmit. */
dev->features |= NETIF_F_LLTX;
/* We support hardware tx checksums. */
dev->features |= NETIF_F_HW_CSUM;
/* We support scatter/gather. */
dev->features |= NETIF_F_SG;
/* We support TSO. */
dev->features |= NETIF_F_TSO;
#ifdef TILE_NET_GSO
/* We support GSO. */
dev->features |= NETIF_F_GSO;
#endif
if (hash_default)
dev->features |= NETIF_F_HIGHDMA;
/* ISSUE: We should support NETIF_F_UFO. */
dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
dev->mtu = TILE_NET_MTU;
}
/*
* Allocate the device structure, register the device, and obtain the
* MAC address from the hypervisor.
*/
static struct net_device *tile_net_dev_init(const char *name)
{
int ret;
struct net_device *dev;
struct tile_net_priv *priv;
struct page *page;
/*
* Allocate the device structure. This allocates "priv", calls
* tile_net_setup(), and saves "name". Normally, "name" is a
* template, instantiated by register_netdev(), but not for us.
*/
dev = alloc_netdev(sizeof(*priv), name, tile_net_setup);
if (!dev) {
pr_err("alloc_netdev(%s) failed\n", name);
return NULL;
}
priv = netdev_priv(dev);
/* Initialize "priv". */
memset(priv, 0, sizeof(*priv));
/* Save "dev" for "tile_net_open_retry()". */
priv->dev = dev;
INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
spin_lock_init(&priv->cmd_lock);
spin_lock_init(&priv->comp_lock);
/* Allocate "epp_queue". */
BUG_ON(get_order(sizeof(lepp_queue_t)) != 0);
page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
if (!page) {
free_netdev(dev);
return NULL;
}
priv->epp_queue = page_address(page);
/* Register the network device. */
ret = register_netdev(dev);
if (ret) {
pr_err("register_netdev %s failed %d\n", dev->name, ret);
free_page((unsigned long)priv->epp_queue);
free_netdev(dev);
return NULL;
}
/* Get the MAC address. */
ret = tile_net_get_mac(dev);
if (ret < 0) {
unregister_netdev(dev);
free_page((unsigned long)priv->epp_queue);
free_netdev(dev);
return NULL;
}
return dev;
}
/*
* Module cleanup.
*/
static void tile_net_cleanup(void)
{
int i;
for (i = 0; i < TILE_NET_DEVS; i++) {
if (tile_net_devs[i]) {
struct net_device *dev = tile_net_devs[i];
struct tile_net_priv *priv = netdev_priv(dev);
unregister_netdev(dev);
finv_buffer(priv->epp_queue, PAGE_SIZE);
free_page((unsigned long)priv->epp_queue);
free_netdev(dev);
}
}
}
/*
* Module initialization.
*/
static int tile_net_init_module(void)
{
pr_info("Tilera IPP Net Driver\n");
tile_net_devs[0] = tile_net_dev_init("xgbe0");
tile_net_devs[1] = tile_net_dev_init("xgbe1");
tile_net_devs[2] = tile_net_dev_init("gbe0");
tile_net_devs[3] = tile_net_dev_init("gbe1");
return 0;
}
#ifndef MODULE
/*
* The "network_cpus" boot argument specifies the cpus that are dedicated
* to handle ingress packets.
*
* The parameter should be in the form "network_cpus=m-n[,x-y]", where
* m, n, x, y are integer numbers that represent the cpus that can be
* neither a dedicated cpu nor a dataplane cpu.
*/
static int __init network_cpus_setup(char *str)
{
int rc = cpulist_parse_crop(str, &network_cpus_map);
if (rc != 0) {
pr_warning("network_cpus=%s: malformed cpu list\n",
str);
} else {
/* Remove dedicated cpus. */
cpumask_and(&network_cpus_map, &network_cpus_map,
cpu_possible_mask);
if (cpumask_empty(&network_cpus_map)) {
pr_warning("Ignoring network_cpus='%s'.\n",
str);
} else {
char buf[1024];
cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map);
pr_info("Linux network CPUs: %s\n", buf);
network_cpus_used = true;
}
}
return 0;
}
__setup("network_cpus=", network_cpus_setup);
#endif
module_init(tile_net_init_module);
module_exit(tile_net_cleanup);
...@@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o ...@@ -49,6 +49,7 @@ obj-$(CONFIG_MIPS) += setup-bus.o setup-irq.o
obj-$(CONFIG_X86_VISWS) += setup-irq.o obj-$(CONFIG_X86_VISWS) += setup-irq.o
obj-$(CONFIG_MN10300) += setup-bus.o obj-$(CONFIG_MN10300) += setup-bus.o
obj-$(CONFIG_MICROBLAZE) += setup-bus.o obj-$(CONFIG_MICROBLAZE) += setup-bus.o
obj-$(CONFIG_TILE) += setup-bus.o setup-irq.o
# #
# ACPI Related PCI FW Functions # ACPI Related PCI FW Functions
......
...@@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB, ...@@ -2136,6 +2136,24 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82865_HB,
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82875_HB,
quirk_unhide_mch_dev6); quirk_unhide_mch_dev6);
#ifdef CONFIG_TILE
/*
* The Tilera TILEmpower platform needs to set the link speed
* to 2.5GT(Giga-Transfers)/s (Gen 1). The default link speed
* setting is 5GT/s (Gen 2). 0x98 is the Link Control2 PCIe
* capability register of the PEX8624 PCIe switch. The switch
* supports link speed auto negotiation, but falsely sets
* the link speed to 5GT/s.
*/
static void __devinit quirk_tile_plx_gen1(struct pci_dev *dev)
{
if (tile_plx_gen1) {
pci_write_config_dword(dev, 0x98, 0x1);
mdelay(50);
}
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_PLX, 0x8624, quirk_tile_plx_gen1);
#endif /* CONFIG_TILE */
#ifdef CONFIG_PCI_MSI #ifdef CONFIG_PCI_MSI
/* Some chipsets do not support MSI. We cannot easily rely on setting /* Some chipsets do not support MSI. We cannot easily rely on setting
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment