aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Hutchings <bhutchings@solarflare.com>2008-04-27 12:55:59 +0100
committerJeff Garzik <jgarzik@redhat.com>2008-04-29 01:42:43 -0400
commit8ceee660aacb29721e26f08e336c58dc4847d1bd (patch)
tree158122642e6f21fe85d072c50d6185a0d0cf6834
parent358c12953b88c5a06a57c33eb27c753b2e7934d1 (diff)
New driver "sfc" for Solarstorm SFC4000 controller.
The driver supports the 10Xpress PHY and XFP modules on our reference designs SFE4001 and SFE4002 and the SMC models SMC10GPCIe-XFP and SMC10GPCIe-10BT. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
-rw-r--r--MAINTAINERS7
-rw-r--r--drivers/net/Kconfig1
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/sfc/Kconfig12
-rw-r--r--drivers/net/sfc/Makefile5
-rw-r--r--drivers/net/sfc/bitfield.h508
-rw-r--r--drivers/net/sfc/boards.c167
-rw-r--r--drivers/net/sfc/boards.h26
-rw-r--r--drivers/net/sfc/efx.c2208
-rw-r--r--drivers/net/sfc/efx.h67
-rw-r--r--drivers/net/sfc/enum.h50
-rw-r--r--drivers/net/sfc/ethtool.c460
-rw-r--r--drivers/net/sfc/ethtool.h27
-rw-r--r--drivers/net/sfc/falcon.c2722
-rw-r--r--drivers/net/sfc/falcon.h130
-rw-r--r--drivers/net/sfc/falcon_hwdefs.h1135
-rw-r--r--drivers/net/sfc/falcon_io.h243
-rw-r--r--drivers/net/sfc/falcon_xmac.c585
-rw-r--r--drivers/net/sfc/gmii.h195
-rw-r--r--drivers/net/sfc/i2c-direct.c381
-rw-r--r--drivers/net/sfc/i2c-direct.h91
-rw-r--r--drivers/net/sfc/mac.h33
-rw-r--r--drivers/net/sfc/mdio_10g.c282
-rw-r--r--drivers/net/sfc/mdio_10g.h232
-rw-r--r--drivers/net/sfc/net_driver.h883
-rw-r--r--drivers/net/sfc/phy.h48
-rw-r--r--drivers/net/sfc/rx.c875
-rw-r--r--drivers/net/sfc/rx.h29
-rw-r--r--drivers/net/sfc/sfe4001.c252
-rw-r--r--drivers/net/sfc/spi.h71
-rw-r--r--drivers/net/sfc/tenxpress.c434
-rw-r--r--drivers/net/sfc/tx.c452
-rw-r--r--drivers/net/sfc/tx.h24
-rw-r--r--drivers/net/sfc/workarounds.h56
-rw-r--r--drivers/net/sfc/xenpack.h62
-rw-r--r--drivers/net/sfc/xfp_phy.c132
36 files changed, 12887 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 36aadf6003b..2112034e164 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3522,6 +3522,13 @@ M: pfg@sgi.com
L: linux-ia64@vger.kernel.org
S: Supported
+SFC NETWORK DRIVER
+P: Steve Hodgson
+P: Ben Hutchings
+P: Robert Stonehouse
+M: linux-net-drivers@solarflare.com
+S: Supported
+
SGI VISUAL WORKSTATION 320 AND 540
P: Andrey Panin
M: pazke@donpac.ru
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 45c3a208d93..50b36b408ca 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2592,6 +2592,7 @@ config BNX2X
To compile this driver as a module, choose M here: the module
will be called bnx2x. This is recommended.
+source "drivers/net/sfc/Kconfig"
endif # NETDEV_10000
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 4d71729e85e..371cb0785b2 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -252,3 +252,5 @@ obj-$(CONFIG_FS_ENET) += fs_enet/
obj-$(CONFIG_NETXEN_NIC) += netxen/
obj-$(CONFIG_NIU) += niu.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
+obj-$(CONFIG_SFC) += sfc/
+
diff --git a/drivers/net/sfc/Kconfig b/drivers/net/sfc/Kconfig
new file mode 100644
index 00000000000..dbad95c295b
--- /dev/null
+++ b/drivers/net/sfc/Kconfig
@@ -0,0 +1,12 @@
+config SFC
+ tristate "Solarflare Solarstorm SFC4000 support"
+ depends on PCI && INET
+ select MII
+ select INET_LRO
+ select CRC32
+ help
+ This driver supports 10-gigabit Ethernet cards based on
+ the Solarflare Communications Solarstorm SFC4000 controller.
+
+ To compile this driver as a module, choose M here. The module
+ will be called sfc.
diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile
new file mode 100644
index 00000000000..0f023447eaf
--- /dev/null
+++ b/drivers/net/sfc/Makefile
@@ -0,0 +1,5 @@
+sfc-y += efx.o falcon.o tx.o rx.o falcon_xmac.o \
+ i2c-direct.o ethtool.o xfp_phy.o mdio_10g.o \
+ tenxpress.o boards.o sfe4001.o
+
+obj-$(CONFIG_SFC) += sfc.o
diff --git a/drivers/net/sfc/bitfield.h b/drivers/net/sfc/bitfield.h
new file mode 100644
index 00000000000..2806201644c
--- /dev/null
+++ b/drivers/net/sfc/bitfield.h
@@ -0,0 +1,508 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_BITFIELD_H
+#define EFX_BITFIELD_H
+
+/*
+ * Efx bitfield access
+ *
+ * Efx NICs make extensive use of bitfields up to 128 bits
+ * wide. Since there is no native 128-bit datatype on most systems,
+ * and since 64-bit datatypes are inefficient on 32-bit systems and
+ * vice versa, we wrap accesses in a way that uses the most efficient
+ * datatype.
+ *
+ * The NICs are PCI devices and therefore little-endian. Since most
+ * of the quantities that we deal with are DMAed to/from host memory,
+ * we define our datatypes (efx_oword_t, efx_qword_t and
+ * efx_dword_t) to be little-endian.
+ */
+
+/* Lowest bit numbers and widths */
+#define EFX_DUMMY_FIELD_LBN 0
+#define EFX_DUMMY_FIELD_WIDTH 0
+#define EFX_DWORD_0_LBN 0
+#define EFX_DWORD_0_WIDTH 32
+#define EFX_DWORD_1_LBN 32
+#define EFX_DWORD_1_WIDTH 32
+#define EFX_DWORD_2_LBN 64
+#define EFX_DWORD_2_WIDTH 32
+#define EFX_DWORD_3_LBN 96
+#define EFX_DWORD_3_WIDTH 32
+
+/* Specified attribute (e.g. LBN) of the specified field */
+#define EFX_VAL(field, attribute) field ## _ ## attribute
+/* Low bit number of the specified field */
+#define EFX_LOW_BIT(field) EFX_VAL(field, LBN)
+/* Bit width of the specified field */
+#define EFX_WIDTH(field) EFX_VAL(field, WIDTH)
+/* High bit number of the specified field */
+#define EFX_HIGH_BIT(field) (EFX_LOW_BIT(field) + EFX_WIDTH(field) - 1)
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 64 bits.
+ */
+#define EFX_MASK64(field) \
+ (EFX_WIDTH(field) == 64 ? ~((u64) 0) : \
+ (((((u64) 1) << EFX_WIDTH(field))) - 1))
+
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 32 bits. Use
+ * EFX_MASK64 for higher width fields.
+ */
+#define EFX_MASK32(field) \
+ (EFX_WIDTH(field) == 32 ? ~((u32) 0) : \
+ (((((u32) 1) << EFX_WIDTH(field))) - 1))
+
+/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */
+typedef union efx_dword {
+ __le32 u32[1];
+} efx_dword_t;
+
+/* A quadword (i.e. 8 byte) datatype - little-endian in HW */
+typedef union efx_qword {
+ __le64 u64[1];
+ __le32 u32[2];
+ efx_dword_t dword[2];
+} efx_qword_t;
+
+/* An octword (eight-word, i.e. 16 byte) datatype - little-endian in HW */
+typedef union efx_oword {
+ __le64 u64[2];
+ efx_qword_t qword[2];
+ __le32 u32[4];
+ efx_dword_t dword[4];
+} efx_oword_t;
+
+/* Format string and value expanders for printk */
+#define EFX_DWORD_FMT "%08x"
+#define EFX_QWORD_FMT "%08x:%08x"
+#define EFX_OWORD_FMT "%08x:%08x:%08x:%08x"
+#define EFX_DWORD_VAL(dword) \
+ ((unsigned int) le32_to_cpu((dword).u32[0]))
+#define EFX_QWORD_VAL(qword) \
+ ((unsigned int) le32_to_cpu((qword).u32[1])), \
+ ((unsigned int) le32_to_cpu((qword).u32[0]))
+#define EFX_OWORD_VAL(oword) \
+ ((unsigned int) le32_to_cpu((oword).u32[3])), \
+ ((unsigned int) le32_to_cpu((oword).u32[2])), \
+ ((unsigned int) le32_to_cpu((oword).u32[1])), \
+ ((unsigned int) le32_to_cpu((oword).u32[0]))
+
+/*
+ * Extract bit field portion [low,high) from the native-endian element
+ * which contains bits [min,max).
+ *
+ * For example, suppose "element" represents the high 32 bits of a
+ * 64-bit value, and we wish to extract the bits belonging to the bit
+ * field occupying bits 28-45 of this 64-bit value.
+ *
+ * Then EFX_EXTRACT ( element, 32, 63, 28, 45 ) would give
+ *
+ * ( element ) << 4
+ *
+ * The result will contain the relevant bits filled in in the range
+ * [0,high-low), with garbage in bits [high-low+1,...).
+ */
+#define EFX_EXTRACT_NATIVE(native_element, min, max, low, high) \
+ (((low > max) || (high < min)) ? 0 : \
+ ((low > min) ? \
+ ((native_element) >> (low - min)) : \
+ ((native_element) << (min - low))))
+
+/*
+ * Extract bit field portion [low,high) from the 64-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EFX_EXTRACT64(element, min, max, low, high) \
+ EFX_EXTRACT_NATIVE(le64_to_cpu(element), min, max, low, high)
+
+/*
+ * Extract bit field portion [low,high) from the 32-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EFX_EXTRACT32(element, min, max, low, high) \
+ EFX_EXTRACT_NATIVE(le32_to_cpu(element), min, max, low, high)
+
+#define EFX_EXTRACT_OWORD64(oword, low, high) \
+ (EFX_EXTRACT64((oword).u64[0], 0, 63, low, high) | \
+ EFX_EXTRACT64((oword).u64[1], 64, 127, low, high))
+
+#define EFX_EXTRACT_QWORD64(qword, low, high) \
+ EFX_EXTRACT64((qword).u64[0], 0, 63, low, high)
+
+#define EFX_EXTRACT_OWORD32(oword, low, high) \
+ (EFX_EXTRACT32((oword).u32[0], 0, 31, low, high) | \
+ EFX_EXTRACT32((oword).u32[1], 32, 63, low, high) | \
+ EFX_EXTRACT32((oword).u32[2], 64, 95, low, high) | \
+ EFX_EXTRACT32((oword).u32[3], 96, 127, low, high))
+
+#define EFX_EXTRACT_QWORD32(qword, low, high) \
+ (EFX_EXTRACT32((qword).u32[0], 0, 31, low, high) | \
+ EFX_EXTRACT32((qword).u32[1], 32, 63, low, high))
+
+#define EFX_EXTRACT_DWORD(dword, low, high) \
+ EFX_EXTRACT32((dword).u32[0], 0, 31, low, high)
+
+#define EFX_OWORD_FIELD64(oword, field) \
+ (EFX_EXTRACT_OWORD64(oword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \
+ & EFX_MASK64(field))
+
+#define EFX_QWORD_FIELD64(qword, field) \
+ (EFX_EXTRACT_QWORD64(qword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \
+ & EFX_MASK64(field))
+
+#define EFX_OWORD_FIELD32(oword, field) \
+ (EFX_EXTRACT_OWORD32(oword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \
+ & EFX_MASK32(field))
+
+#define EFX_QWORD_FIELD32(qword, field) \
+ (EFX_EXTRACT_QWORD32(qword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \
+ & EFX_MASK32(field))
+
+#define EFX_DWORD_FIELD(dword, field) \
+ (EFX_EXTRACT_DWORD(dword, EFX_LOW_BIT(field), EFX_HIGH_BIT(field)) \
+ & EFX_MASK32(field))
+
+#define EFX_OWORD_IS_ZERO64(oword) \
+ (((oword).u64[0] | (oword).u64[1]) == (__force __le64) 0)
+
+#define EFX_QWORD_IS_ZERO64(qword) \
+ (((qword).u64[0]) == (__force __le64) 0)
+
+#define EFX_OWORD_IS_ZERO32(oword) \
+ (((oword).u32[0] | (oword).u32[1] | (oword).u32[2] | (oword).u32[3]) \
+ == (__force __le32) 0)
+
+#define EFX_QWORD_IS_ZERO32(qword) \
+ (((qword).u32[0] | (qword).u32[1]) == (__force __le32) 0)
+
+#define EFX_DWORD_IS_ZERO(dword) \
+ (((dword).u32[0]) == (__force __le32) 0)
+
+#define EFX_OWORD_IS_ALL_ONES64(oword) \
+ (((oword).u64[0] & (oword).u64[1]) == ~((__force __le64) 0))
+
+#define EFX_QWORD_IS_ALL_ONES64(qword) \
+ ((qword).u64[0] == ~((__force __le64) 0))
+
+#define EFX_OWORD_IS_ALL_ONES32(oword) \
+ (((oword).u32[0] & (oword).u32[1] & (oword).u32[2] & (oword).u32[3]) \
+ == ~((__force __le32) 0))
+
+#define EFX_QWORD_IS_ALL_ONES32(qword) \
+ (((qword).u32[0] & (qword).u32[1]) == ~((__force __le32) 0))
+
+#define EFX_DWORD_IS_ALL_ONES(dword) \
+ ((dword).u32[0] == ~((__force __le32) 0))
+
+#if BITS_PER_LONG == 64
+#define EFX_OWORD_FIELD EFX_OWORD_FIELD64
+#define EFX_QWORD_FIELD EFX_QWORD_FIELD64
+#define EFX_OWORD_IS_ZERO EFX_OWORD_IS_ZERO64
+#define EFX_QWORD_IS_ZERO EFX_QWORD_IS_ZERO64
+#define EFX_OWORD_IS_ALL_ONES EFX_OWORD_IS_ALL_ONES64
+#define EFX_QWORD_IS_ALL_ONES EFX_QWORD_IS_ALL_ONES64
+#else
+#define EFX_OWORD_FIELD EFX_OWORD_FIELD32
+#define EFX_QWORD_FIELD EFX_QWORD_FIELD32
+#define EFX_OWORD_IS_ZERO EFX_OWORD_IS_ZERO32
+#define EFX_QWORD_IS_ZERO EFX_QWORD_IS_ZERO32
+#define EFX_OWORD_IS_ALL_ONES EFX_OWORD_IS_ALL_ONES32
+#define EFX_QWORD_IS_ALL_ONES EFX_QWORD_IS_ALL_ONES32
+#endif
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the bit field [low,high) that lies within
+ * the range [min,max).
+ */
+#define EFX_INSERT_NATIVE64(min, max, low, high, value) \
+ (((low > max) || (high < min)) ? 0 : \
+ ((low > min) ? \
+ (((u64) (value)) << (low - min)) : \
+ (((u64) (value)) >> (min - low))))
+
+#define EFX_INSERT_NATIVE32(min, max, low, high, value) \
+ (((low > max) || (high < min)) ? 0 : \
+ ((low > min) ? \
+ (((u32) (value)) << (low - min)) : \
+ (((u32) (value)) >> (min - low))))
+
+#define EFX_INSERT_NATIVE(min, max, low, high, value) \
+ ((((max - min) >= 32) || ((high - low) >= 32)) ? \
+ EFX_INSERT_NATIVE64(min, max, low, high, value) : \
+ EFX_INSERT_NATIVE32(min, max, low, high, value))
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the named bit field that lies within the
+ * range [min,max).
+ */
+#define EFX_INSERT_FIELD_NATIVE(min, max, field, value) \
+ EFX_INSERT_NATIVE(min, max, EFX_LOW_BIT(field), \
+ EFX_HIGH_BIT(field), value)
+
+/*
+ * Construct bit field
+ *
+ * Creates the portion of the named bit fields that lie within the
+ * range [min,max).
+ */
+#define EFX_INSERT_FIELDS_NATIVE(min, max, \
+ field1, value1, \
+ field2, value2, \
+ field3, value3, \
+ field4, value4, \
+ field5, value5, \
+ field6, value6, \
+ field7, value7, \
+ field8, value8, \
+ field9, value9, \
+ field10, value10) \
+ (EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field4, (value4)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field5, (value5)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field6, (value6)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) | \
+ EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10)))
+
+#define EFX_INSERT_FIELDS64(...) \
+ cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EFX_INSERT_FIELDS32(...) \
+ cpu_to_le32(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EFX_POPULATE_OWORD64(oword, ...) do { \
+ (oword).u64[0] = EFX_INSERT_FIELDS64(0, 63, __VA_ARGS__); \
+ (oword).u64[1] = EFX_INSERT_FIELDS64(64, 127, __VA_ARGS__); \
+ } while (0)
+
+#define EFX_POPULATE_QWORD64(qword, ...) do { \
+ (qword).u64[0] = EFX_INSERT_FIELDS64(0, 63, __VA_ARGS__); \
+ } while (0)
+
+#define EFX_POPULATE_OWORD32(oword, ...) do { \
+ (oword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \
+ (oword).u32[1] = EFX_INSERT_FIELDS32(32, 63, __VA_ARGS__); \
+ (oword).u32[2] = EFX_INSERT_FIELDS32(64, 95, __VA_ARGS__); \
+ (oword).u32[3] = EFX_INSERT_FIELDS32(96, 127, __VA_ARGS__); \
+ } while (0)
+
+#define EFX_POPULATE_QWORD32(qword, ...) do { \
+ (qword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \
+ (qword).u32[1] = EFX_INSERT_FIELDS32(32, 63, __VA_ARGS__); \
+ } while (0)
+
+#define EFX_POPULATE_DWORD(dword, ...) do { \
+ (dword).u32[0] = EFX_INSERT_FIELDS32(0, 31, __VA_ARGS__); \
+ } while (0)
+
+#if BITS_PER_LONG == 64
+#define EFX_POPULATE_OWORD EFX_POPULATE_OWORD64
+#define EFX_POPULATE_QWORD EFX_POPULATE_QWORD64
+#else
+#define EFX_POPULATE_OWORD EFX_POPULATE_OWORD32
+#define EFX_POPULATE_QWORD EFX_POPULATE_QWORD32
+#endif
+
+/* Populate an octword field with various numbers of arguments */
+#define EFX_POPULATE_OWORD_10 EFX_POPULATE_OWORD
+#define EFX_POPULATE_OWORD_9(oword, ...) \
+ EFX_POPULATE_OWORD_10(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_8(oword, ...) \
+ EFX_POPULATE_OWORD_9(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_7(oword, ...) \
+ EFX_POPULATE_OWORD_8(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_6(oword, ...) \
+ EFX_POPULATE_OWORD_7(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_5(oword, ...) \
+ EFX_POPULATE_OWORD_6(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_4(oword, ...) \
+ EFX_POPULATE_OWORD_5(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_3(oword, ...) \
+ EFX_POPULATE_OWORD_4(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_2(oword, ...) \
+ EFX_POPULATE_OWORD_3(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_1(oword, ...) \
+ EFX_POPULATE_OWORD_2(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_ZERO_OWORD(oword) \
+ EFX_POPULATE_OWORD_1(oword, EFX_DUMMY_FIELD, 0)
+#define EFX_SET_OWORD(oword) \
+ EFX_POPULATE_OWORD_4(oword, \
+ EFX_DWORD_0, 0xffffffff, \
+ EFX_DWORD_1, 0xffffffff, \
+ EFX_DWORD_2, 0xffffffff, \
+ EFX_DWORD_3, 0xffffffff)
+
+/* Populate a quadword field with various numbers of arguments */
+#define EFX_POPULATE_QWORD_10 EFX_POPULATE_QWORD
+#define EFX_POPULATE_QWORD_9(qword, ...) \
+ EFX_POPULATE_QWORD_10(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_8(qword, ...) \
+ EFX_POPULATE_QWORD_9(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_7(qword, ...) \
+ EFX_POPULATE_QWORD_8(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_6(qword, ...) \
+ EFX_POPULATE_QWORD_7(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_5(qword, ...) \
+ EFX_POPULATE_QWORD_6(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_4(qword, ...) \
+ EFX_POPULATE_QWORD_5(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_3(qword, ...) \
+ EFX_POPULATE_QWORD_4(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_2(qword, ...) \
+ EFX_POPULATE_QWORD_3(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_1(qword, ...) \
+ EFX_POPULATE_QWORD_2(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_ZERO_QWORD(qword) \
+ EFX_POPULATE_QWORD_1(qword, EFX_DUMMY_FIELD, 0)
+#define EFX_SET_QWORD(qword) \
+ EFX_POPULATE_QWORD_2(qword, \
+ EFX_DWORD_0, 0xffffffff, \
+ EFX_DWORD_1, 0xffffffff)
+
+/* Populate a dword field with various numbers of arguments */
+#define EFX_POPULATE_DWORD_10 EFX_POPULATE_DWORD
+#define EFX_POPULATE_DWORD_9(dword, ...) \
+ EFX_POPULATE_DWORD_10(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_8(dword, ...) \
+ EFX_POPULATE_DWORD_9(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_7(dword, ...) \
+ EFX_POPULATE_DWORD_8(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_6(dword, ...) \
+ EFX_POPULATE_DWORD_7(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_5(dword, ...) \
+ EFX_POPULATE_DWORD_6(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_4(dword, ...) \
+ EFX_POPULATE_DWORD_5(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_3(dword, ...) \
+ EFX_POPULATE_DWORD_4(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_2(dword, ...) \
+ EFX_POPULATE_DWORD_3(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_1(dword, ...) \
+ EFX_POPULATE_DWORD_2(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_ZERO_DWORD(dword) \
+ EFX_POPULATE_DWORD_1(dword, EFX_DUMMY_FIELD, 0)
+#define EFX_SET_DWORD(dword) \
+ EFX_POPULATE_DWORD_1(dword, EFX_DWORD_0, 0xffffffff)
+
+/*
+ * Modify a named field within an already-populated structure. Used
+ * for read-modify-write operations.
+ *
+ */
+
+#define EFX_INVERT_OWORD(oword) do { \
+ (oword).u64[0] = ~((oword).u64[0]); \
+ (oword).u64[1] = ~((oword).u64[1]); \
+ } while (0)
+
+#define EFX_INSERT_FIELD64(...) \
+ cpu_to_le64(EFX_INSERT_FIELD_NATIVE(__VA_ARGS__))
+
+#define EFX_INSERT_FIELD32(...) \
+ cpu_to_le32(EFX_INSERT_FIELD_NATIVE(__VA_ARGS__))
+
+#define EFX_INPLACE_MASK64(min, max, field) \
+ EFX_INSERT_FIELD64(min, max, field, EFX_MASK64(field))
+
+#define EFX_INPLACE_MASK32(min, max, field) \
+ EFX_INSERT_FIELD32(min, max, field, EFX_MASK32(field))
+
+#define EFX_SET_OWORD_FIELD64(oword, field, value) do { \
+ (oword).u64[0] = (((oword).u64[0] \
+ & ~EFX_INPLACE_MASK64(0, 63, field)) \
+ | EFX_INSERT_FIELD64(0, 63, field, value)); \
+ (oword).u64[1] = (((oword).u64[1] \
+ & ~EFX_INPLACE_MASK64(64, 127, field)) \
+ | EFX_INSERT_FIELD64(64, 127, field, value)); \
+ } while (0)
+
+#define EFX_SET_QWORD_FIELD64(qword, field, value) do { \
+ (qword).u64[0] = (((qword).u64[0] \
+ & ~EFX_INPLACE_MASK64(0, 63, field)) \
+ | EFX_INSERT_FIELD64(0, 63, field, value)); \
+ } while (0)
+
+#define EFX_SET_OWORD_FIELD32(oword, field, value) do { \
+ (oword).u32[0] = (((oword).u32[0] \
+ & ~EFX_INPLACE_MASK32(0, 31, field)) \
+ | EFX_INSERT_FIELD32(0, 31, field, value)); \
+ (oword).u32[1] = (((oword).u32[1] \
+ & ~EFX_INPLACE_MASK32(32, 63, field)) \
+ | EFX_INSERT_FIELD32(32, 63, field, value)); \
+ (oword).u32[2] = (((oword).u32[2] \
+ & ~EFX_INPLACE_MASK32(64, 95, field)) \
+ | EFX_INSERT_FIELD32(64, 95, field, value)); \
+ (oword).u32[3] = (((oword).u32[3] \
+ & ~EFX_INPLACE_MASK32(96, 127, field)) \
+ | EFX_INSERT_FIELD32(96, 127, field, value)); \
+ } while (0)
+
+#define EFX_SET_QWORD_FIELD32(qword, field, value) do { \
+ (qword).u32[0] = (((qword).u32[0] \
+ & ~EFX_INPLACE_MASK32(0, 31, field)) \
+ | EFX_INSERT_FIELD32(0, 31, field, value)); \
+ (qword).u32[1] = (((qword).u32[1] \
+ & ~EFX_INPLACE_MASK32(32, 63, field)) \
+ | EFX_INSERT_FIELD32(32, 63, field, value)); \
+ } while (0)
+
+#define EFX_SET_DWORD_FIELD(dword, field, value) do { \
+ (dword).u32[0] = (((dword).u32[0] \
+ & ~EFX_INPLACE_MASK32(0, 31, field)) \
+ | EFX_INSERT_FIELD32(0, 31, field, value)); \
+ } while (0)
+
+#if BITS_PER_LONG == 64
+#define EFX_SET_OWORD_FIELD EFX_SET_OWORD_FIELD64
+#define EFX_SET_QWORD_FIELD EFX_SET_QWORD_FIELD64
+#else
+#define EFX_SET_OWORD_FIELD EFX_SET_OWORD_FIELD32
+#define EFX_SET_QWORD_FIELD EFX_SET_QWORD_FIELD32
+#endif
+
+#define EFX_SET_OWORD_FIELD_VER(efx, oword, field, value) do { \
+ if (FALCON_REV(efx) >= FALCON_REV_B0) { \
+ EFX_SET_OWORD_FIELD((oword), field##_B0, (value)); \
+ } else { \
+ EFX_SET_OWORD_FIELD((oword), field##_A1, (value)); \
+ } \
+} while (0)
+
+#define EFX_QWORD_FIELD_VER(efx, qword, field) \
+ (FALCON_REV(efx) >= FALCON_REV_B0 ? \
+ EFX_QWORD_FIELD((qword), field##_B0) : \
+ EFX_QWORD_FIELD((qword), field##_A1))
+
+/* Used to avoid compiler warnings about shift range exceeding width
+ * of the data types when dma_addr_t is only 32 bits wide.
+ */
+#define DMA_ADDR_T_WIDTH (8 * sizeof(dma_addr_t))
+#define EFX_DMA_TYPE_WIDTH(width) \
+ (((width) < DMA_ADDR_T_WIDTH) ? (width) : DMA_ADDR_T_WIDTH)
+#define EFX_DMA_MAX_MASK ((DMA_ADDR_T_WIDTH == 64) ? \
+ ~((u64) 0) : ~((u32) 0))
+#define EFX_DMA_MASK(mask) ((mask) & EFX_DMA_MAX_MASK)
+
+#endif /* EFX_BITFIELD_H */
diff --git a/drivers/net/sfc/boards.c b/drivers/net/sfc/boards.c
new file mode 100644
index 00000000000..eecaa6d5858
--- /dev/null
+++ b/drivers/net/sfc/boards.c
@@ -0,0 +1,167 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "phy.h"
+#include "boards.h"
+#include "efx.h"
+
+/* Macros for unpacking the board revision */
+/* The revision info is in host byte order. */
+#define BOARD_TYPE(_rev) (_rev >> 8)
+#define BOARD_MAJOR(_rev) ((_rev >> 4) & 0xf)
+#define BOARD_MINOR(_rev) (_rev & 0xf)
+
+/* Blink support. If the PHY has no auto-blink mode so we hang it off a timer */
+#define BLINK_INTERVAL (HZ/2)
+
+static void blink_led_timer(unsigned long context)
+{
+ struct efx_nic *efx = (struct efx_nic *)context;
+ struct efx_blinker *bl = &efx->board_info.blinker;
+ efx->board_info.set_fault_led(efx, bl->state);
+ bl->state = !bl->state;
+ if (bl->resubmit) {
+ bl->timer.expires = jiffies + BLINK_INTERVAL;
+ add_timer(&bl->timer);
+ }
+}
+
+static void board_blink(struct efx_nic *efx, int blink)
+{
+ struct efx_blinker *blinker = &efx->board_info.blinker;
+
+ /* The rtnl mutex serialises all ethtool ioctls, so
+ * nothing special needs doing here. */
+ if (blink) {
+ blinker->resubmit = 1;
+ blinker->state = 0;
+ setup_timer(&blinker->timer, blink_led_timer,
+ (unsigned long)efx);
+ blinker->timer.expires = jiffies + BLINK_INTERVAL;
+ add_timer(&blinker->timer);
+ } else {
+ blinker->resubmit = 0;
+ if (blinker->timer.function)
+ del_timer_sync(&blinker->timer);
+ efx->board_info.set_fault_led(efx, 0);
+ }
+}
+
+/*****************************************************************************
+ * Support for the SFE4002
+ *
+ */
+/****************************************************************************/
+/* LED allocations. Note that on rev A0 boards the schematic and the reality
+ * differ: red and green are swapped. Below is the fixed (A1) layout (there
+ * are only 3 A0 boards in existence, so no real reason to make this
+ * conditional).
+ */
+#define SFE4002_FAULT_LED (2) /* Red */
+#define SFE4002_RX_LED (0) /* Green */
+#define SFE4002_TX_LED (1) /* Amber */
+
+static int sfe4002_init_leds(struct efx_nic *efx)
+{
+ /* Set the TX and RX LEDs to reflect status and activity, and the
+ * fault LED off */
+ xfp_set_led(efx, SFE4002_TX_LED,
+ QUAKE_LED_TXLINK | QUAKE_LED_LINK_ACTSTAT);
+ xfp_set_led(efx, SFE4002_RX_LED,
+ QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACTSTAT);
+ xfp_set_led(efx, SFE4002_FAULT_LED, QUAKE_LED_OFF);
+ efx->board_info.blinker.led_num = SFE4002_FAULT_LED;
+ return 0;
+}
+
+static void sfe4002_fault_led(struct efx_nic *efx, int state)
+{
+ xfp_set_led(efx, SFE4002_FAULT_LED, state ? QUAKE_LED_ON :
+ QUAKE_LED_OFF);
+}
+
+static int sfe4002_init(struct efx_nic *efx)
+{
+ efx->board_info.init_leds = sfe4002_init_leds;
+ efx->board_info.set_fault_led = sfe4002_fault_led;
+ efx->board_info.blink = board_blink;
+ return 0;
+}
+
+/* This will get expanded as board-specific details get moved out of the
+ * PHY drivers. */
+struct efx_board_data {
+ const char *ref_model;
+ const char *gen_type;
+ int (*init) (struct efx_nic *nic);
+};
+
+static int dummy_init(struct efx_nic *nic)
+{
+ return 0;
+}
+
+static struct efx_board_data board_data[] = {
+ [EFX_BOARD_INVALID] =
+ {NULL, NULL, dummy_init},
+ [EFX_BOARD_SFE4001] =
+ {"SFE4001", "10GBASE-T adapter", sfe4001_poweron},
+ [EFX_BOARD_SFE4002] =
+ {"SFE4002", "XFP adapter", sfe4002_init},
+};
+
+int efx_set_board_info(struct efx_nic *efx, u16 revision_info)
+{
+ int rc = 0;
+ struct efx_board_data *data;
+
+ if (BOARD_TYPE(revision_info) >= EFX_BOARD_MAX) {
+ EFX_ERR(efx, "squashing unknown board type %d\n",
+ BOARD_TYPE(revision_info));
+ revision_info = 0;
+ }
+
+ if (BOARD_TYPE(revision_info) == 0) {
+ efx->board_info.major = 0;
+ efx->board_info.minor = 0;
+ /* For early boards that don't have revision info. there is
+ * only 1 board for each PHY type, so we can work it out, with
+ * the exception of the PHY-less boards. */
+ switch (efx->phy_type) {
+ case PHY_TYPE_10XPRESS:
+ efx->board_info.type = EFX_BOARD_SFE4001;
+ break;
+ case PHY_TYPE_XFP:
+ efx->board_info.type = EFX_BOARD_SFE4002;
+ break;
+ default:
+ efx->board_info.type = 0;
+ break;
+ }
+ } else {
+ efx->board_info.type = BOARD_TYPE(revision_info);
+ efx->board_info.major = BOARD_MAJOR(revision_info);
+ efx->board_info.minor = BOARD_MINOR(revision_info);
+ }
+
+ data = &board_data[efx->board_info.type];
+
+ /* Report the board model number or generic type for recognisable
+ * boards. */
+ if (efx->board_info.type != 0)
+ EFX_INFO(efx, "board is %s rev %c%d\n",
+ (efx->pci_dev->subsystem_vendor == EFX_VENDID_SFC)
+ ? data->ref_model : data->gen_type,
+ 'A' + efx->board_info.major, efx->board_info.minor);
+
+ efx->board_info.init = data->init;
+
+ return rc;
+}
diff --git a/drivers/net/sfc/boards.h b/drivers/net/sfc/boards.h
new file mode 100644
index 00000000000..f56341d428e
--- /dev/null
+++ b/drivers/net/sfc/boards.h
@@ -0,0 +1,26 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_BOARDS_H
+#define EFX_BOARDS_H
+
+/* Board IDs (must fit in 8 bits) */
+enum efx_board_type {
+ EFX_BOARD_INVALID = 0,
+ EFX_BOARD_SFE4001 = 1, /* SFE4001 (10GBASE-T) */
+ EFX_BOARD_SFE4002 = 2,
+ /* Insert new types before here */
+ EFX_BOARD_MAX
+};
+
+extern int efx_set_board_info(struct efx_nic *efx, u16 revision_info);
+extern int sfe4001_poweron(struct efx_nic *efx);
+extern void sfe4001_poweroff(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
new file mode 100644
index 00000000000..59edcf793c1
--- /dev/null
+++ b/drivers/net/sfc/efx.c
@@ -0,0 +1,2208 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/notifier.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <linux/crc32.h>
+#include <linux/ethtool.h>
+#include "net_driver.h"
+#include "gmii.h"
+#include "ethtool.h"
+#include "tx.h"
+#include "rx.h"
+#include "efx.h"
+#include "mdio_10g.h"
+#include "falcon.h"
+#include "workarounds.h"
+#include "mac.h"
+
+#define EFX_MAX_MTU (9 * 1024)
+
+/* RX slow fill workqueue. If memory allocation fails in the fast path,
+ * a work item is pushed onto this work queue to retry the allocation later,
+ * to avoid the NIC being starved of RX buffers. Since this is a per cpu
+ * workqueue, there is nothing to be gained in making it per NIC
+ */
+static struct workqueue_struct *refill_workqueue;
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ *************************************************************************/
+
+/*
+ * Enable large receive offload (LRO) aka soft segment reassembly (SSR)
+ *
+ * This sets the default for new devices. It can be controlled later
+ * using ethtool.
+ */
+static int lro = 1;
+module_param(lro, int, 0644);
+MODULE_PARM_DESC(lro, "Large receive offload acceleration");
+
+/*
+ * Use separate channels for TX and RX events
+ *
+ * Set this to 1 to use separate channels for TX and RX. It allows us to
+ * apply a higher level of interrupt moderation to TX events.
+ *
+ * This is forced to 0 for MSI interrupt mode as the interrupt vector
+ * is not written
+ */
+static unsigned int separate_tx_and_rx_channels = 1;
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor, which checks for known hardware bugs and resets the
+ * hardware and driver as necessary.
+ */
+unsigned int efx_monitor_interval = 1 * HZ;
+
+/* This controls whether or not the hardware monitor will trigger a
+ * reset when it detects an error condition.
+ */
+static unsigned int monitor_reset = 1;
+
+/* This controls whether or not the driver will initialise devices
+ * with invalid MAC addresses stored in the EEPROM or flash. If true,
+ * such devices will be initialised with a random locally-generated
+ * MAC address. This allows for loading the sfc_mtd driver to
+ * reprogram the flash, even if the flash contents (including the MAC
+ * address) have previously been erased.
+ */
+static unsigned int allow_bad_hwaddr;
+
+/* Initial interrupt moderation settings. They can be modified after
+ * module load with ethtool.
+ *
+ * The default for RX should strike a balance between increasing the
+ * round-trip latency and reducing overhead.
+ */
+static unsigned int rx_irq_mod_usec = 60;
+
+/* Initial interrupt moderation settings. They can be modified after
+ * module load with ethtool.
+ *
+ * This default is chosen to ensure that a 10G link does not go idle
+ * while a TX queue is stopped after it has become full. A queue is
+ * restarted when it drops below half full. The time this takes (assuming
+ * worst case 3 descriptors per packet and 1024 descriptors) is
+ * 512 / 3 * 1.2 = 205 usec.
+ */
+static unsigned int tx_irq_mod_usec = 150;
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each package (level II cache)
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+/**************************************************************************
+ *
+ * Utility functions and prototypes
+ *
+ *************************************************************************/
+static void efx_remove_channel(struct efx_channel *channel);
+static void efx_remove_port(struct efx_nic *efx);
+static void efx_fini_napi(struct efx_nic *efx);
+static void efx_fini_channels(struct efx_nic *efx);
+
+#define EFX_ASSERT_RESET_SERIALISED(efx) \
+ do { \
+ if ((efx->state == STATE_RUNNING) || \
+ (efx->state == STATE_RESETTING)) \
+ ASSERT_RTNL(); \
+ } while (0)
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel. The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static inline int efx_process_channel(struct efx_channel *channel, int rx_quota)
+{
+ int rxdmaqs;
+ struct efx_rx_queue *rx_queue;
+
+ if (unlikely(channel->efx->reset_pending != RESET_TYPE_NONE ||
+ !channel->enabled))
+ return rx_quota;
+
+ rxdmaqs = falcon_process_eventq(channel, &rx_quota);
+
+ /* Deliver last RX packet. */
+ if (channel->rx_pkt) {
+ __efx_rx_packet(channel, channel->rx_pkt,
+ channel->rx_pkt_csummed);
+ channel->rx_pkt = NULL;
+ }
+
+ efx_flush_lro(channel);
+ efx_rx_strategy(channel);
+
+ /* Refill descriptor rings as necessary */
+ rx_queue = &channel->efx->rx_queue[0];
+ while (rxdmaqs) {
+ if (rxdmaqs & 0x01)
+ efx_fast_push_rx_descriptors(rx_queue);
+ rx_queue++;
+ rxdmaqs >>= 1;
+ }
+
+ return rx_quota;
+}
+
+/* Mark channel as finished processing
+ *
+ * Note that since we will not receive further interrupts for this
+ * channel before we finish processing and call the eventq_read_ack()
+ * method, there is no need to use the interrupt hold-off timers.
+ */
+static inline void efx_channel_processed(struct efx_channel *channel)
+{
+ /* Write to EVQ_RPTR_REG. If a new event arrived in a race
+ * with finishing processing, a new interrupt will be raised.
+ */
+ channel->work_pending = 0;
+ smp_wmb(); /* Ensure channel updated before any new interrupt. */
+ falcon_eventq_read_ack(channel);
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by efx_process_channel().
+ */
+static int efx_poll(struct napi_struct *napi, int budget)
+{
+ struct efx_channel *channel =
+ container_of(napi, struct efx_channel, napi_str);
+ struct net_device *napi_dev = channel->napi_dev;
+ int unused;
+ int rx_packets;
+
+ EFX_TRACE(channel->efx, "channel %d NAPI poll executing on CPU %d\n",
+ channel->channel, raw_smp_processor_id());
+
+ unused = efx_process_channel(channel, budget);
+ rx_packets = (budget - unused);
+
+ if (rx_packets < budget) {
+ /* There is no race here; although napi_disable() will
+ * only wait for netif_rx_complete(), this isn't a problem
+ * since efx_channel_processed() will have no effect if
+ * interrupts have already been disabled.
+ */
+ netif_rx_complete(napi_dev, napi);
+ efx_channel_processed(channel);
+ }
+
+ return rx_packets;
+}
+
+/* Process the eventq of the specified channel immediately on this CPU
+ *
+ * Disable hardware generated interrupts, wait for any existing
+ * processing to finish, then directly poll (and ack ) the eventq.
+ * Finally reenable NAPI and interrupts.
+ *
+ * Since we are touching interrupts the caller should hold the suspend lock
+ */
+void efx_process_channel_now(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+
+ BUG_ON(!channel->used_flags);
+ BUG_ON(!channel->enabled);
+
+ /* Disable interrupts and wait for ISRs to complete */
+ falcon_disable_interrupts(efx);
+ if (efx->legacy_irq)
+ synchronize_irq(efx->legacy_irq);
+ if (channel->has_interrupt && channel->irq)
+ synchronize_irq(channel->irq);
+
+ /* Wait for any NAPI processing to complete */
+ napi_disable(&channel->napi_str);
+
+ /* Poll the channel */
+ (void) efx_process_channel(channel, efx->type->evq_size);
+
+ /* Ack the eventq. This may cause an interrupt to be generated
+ * when they are reenabled */
+ efx_channel_processed(channel);
+
+ napi_enable(&channel->napi_str);
+ falcon_enable_interrupts(efx);
+}
+
+/* Create event queue
+ * Event queue memory allocations are done only once. If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+static int efx_probe_eventq(struct efx_channel *channel)
+{
+ EFX_LOG(channel->efx, "chan %d create event queue\n", channel->channel);
+
+ return falcon_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+static int efx_init_eventq(struct efx_channel *channel)
+{
+ EFX_LOG(channel->efx, "chan %d init event queue\n", channel->channel);
+
+ channel->eventq_read_ptr = 0;
+
+ return falcon_init_eventq(channel);
+}
+
+static void efx_fini_eventq(struct efx_channel *channel)
+{
+ EFX_LOG(channel->efx, "chan %d fini event queue\n", channel->channel);
+
+ falcon_fini_eventq(channel);
+}
+
+static void efx_remove_eventq(struct efx_channel *channel)
+{
+ EFX_LOG(channel->efx, "chan %d remove event queue\n", channel->channel);
+
+ falcon_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Setup per-NIC RX buffer parameters.
+ * Calculate the rx buffer allocation parameters required to support
+ * the current MTU, including padding for header alignment and overruns.
+ */
+static void efx_calc_rx_buffer_params(struct efx_nic *efx)
+{
+ unsigned int order, len;
+
+ len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) +
+ EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
+ efx->type->rx_buffer_padding);
+
+ /* Calculate page-order */
+ for (order = 0; ((1u << order) * PAGE_SIZE) < len; ++order)
+ ;
+
+ efx->rx_buffer_len = len;
+ efx->rx_buffer_order = order;
+}
+
+static int efx_probe_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int rc;
+
+ EFX_LOG(channel->efx, "creating channel %d\n", channel->channel);
+
+ rc = efx_probe_eventq(channel);
+ if (rc)
+ goto fail1;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ rc = efx_probe_tx_queue(tx_queue);
+ if (rc)
+ goto fail2;
+ }
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ rc = efx_probe_rx_queue(rx_queue);
+ if (rc)
+ goto fail3;
+ }
+
+ channel->n_rx_frm_trunc = 0;
+
+ return 0;
+
+ fail3:
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_remove_rx_queue(rx_queue);
+ fail2:
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ efx_remove_tx_queue(tx_queue);
+ fail1:
+ return rc;
+}
+
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static int efx_init_channels(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+ int rc = 0;
+
+ efx_calc_rx_buffer_params(efx);
+
+ /* Initialise the channels */
+ efx_for_each_channel(channel, efx) {
+ EFX_LOG(channel->efx, "init chan %d\n", channel->channel);
+
+ rc = efx_init_eventq(channel);
+ if (rc)
+ goto err;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ rc = efx_init_tx_queue(tx_queue);
+ if (rc)
+ goto err;
+ }
+
+ /* The rx buffer allocation strategy is MTU dependent */
+ efx_rx_strategy(channel);
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ rc = efx_init_rx_queue(rx_queue);
+ if (rc)
+ goto err;
+ }
+
+ WARN_ON(channel->rx_pkt != NULL);
+ efx_rx_strategy(channel);
+ }
+
+ return 0;
+
+ err:
+ EFX_ERR(efx, "failed to initialise channel %d\n",
+ channel ? channel->channel : -1);
+ efx_fini_channels(efx);
+ return rc;
+}
+
+/* This enables event queue processing and packet transmission.
+ *
+ * Note that this function is not allowed to fail, since that would
+ * introduce too much complexity into the suspend/resume path.
+ */
+static void efx_start_channel(struct efx_channel *channel)
+{
+ struct efx_rx_queue *rx_queue;
+
+ EFX_LOG(channel->efx, "starting chan %d\n", channel->channel);
+
+ if (!(channel->efx->net_dev->flags & IFF_UP))
+ netif_napi_add(channel->napi_dev, &channel->napi_str,
+ efx_poll, napi_weight);
+
+ channel->work_pending = 0;
+ channel->enabled = 1;
+ smp_wmb(); /* ensure channel updated before first interrupt */
+
+ napi_enable(&channel->napi_str);
+
+ /* Load up RX descriptors */
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_fast_push_rx_descriptors(rx_queue);
+}
+
+/* This disables event queue processing and packet transmission.
+ * This function does not guarantee that all queue processing
+ * (e.g. RX refill) is complete.
+ */
+static void efx_stop_channel(struct efx_channel *channel)
+{
+ struct efx_rx_queue *rx_queue;
+
+ if (!channel->enabled)
+ return;
+
+ EFX_LOG(channel->efx, "stop chan %d\n", channel->channel);
+
+ channel->enabled = 0;
+ napi_disable(&channel->napi_str);
+
+ /* Ensure that any worker threads have exited or will be no-ops */
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ spin_lock_bh(&rx_queue->add_lock);
+ spin_unlock_bh(&rx_queue->add_lock);
+ }
+}
+
+static void efx_fini_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+ BUG_ON(efx->port_enabled);
+
+ efx_for_each_channel(channel, efx) {
+ EFX_LOG(channel->efx, "shut down chan %d\n", channel->channel);
+
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_fini_rx_queue(rx_queue);
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ efx_fini_tx_queue(tx_queue);
+ }
+
+ /* Do the event queues last so that we can handle flush events
+ * for all DMA queues. */
+ efx_for_each_channel(channel, efx) {
+ EFX_LOG(channel->efx, "shut down evq %d\n", channel->channel);
+
+ efx_fini_eventq(channel);
+ }
+}
+
+static void efx_remove_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+
+ EFX_LOG(channel->efx, "destroy chan %d\n", channel->channel);
+
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_remove_rx_queue(rx_queue);
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ efx_remove_tx_queue(tx_queue);
+ efx_remove_eventq(channel);
+
+ channel->used_flags = 0;
+}
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue, int delay)
+{
+ queue_delayed_work(refill_workqueue, &rx_queue->work, delay);
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+static void efx_link_status_changed(struct efx_nic *efx)
+{
+ int carrier_ok;
+
+ /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+ * that no events are triggered between unregister_netdev() and the
+ * driver unloading. A more general condition is that NETDEV_CHANGE
+ * can only be generated between NETDEV_UP and NETDEV_DOWN */
+ if (!netif_running(efx->net_dev))
+ return;
+
+ carrier_ok = netif_carrier_ok(efx->net_dev) ? 1 : 0;
+ if (efx->link_up != carrier_ok) {
+ efx->n_link_state_changes++;
+
+ if (efx->link_up)
+ netif_carrier_on(efx->net_dev);
+ else
+ netif_carrier_off(efx->net_dev);
+ }
+
+ /* Status message for kernel log */
+ if (efx->link_up) {
+ struct mii_if_info *gmii = &efx->mii;
+ unsigned adv, lpa;
+ /* NONE here means direct XAUI from the controller, with no
+ * MDIO-attached device we can query. */
+ if (efx->phy_type != PHY_TYPE_NONE) {
+ adv = gmii_advertised(gmii);
+ lpa = gmii_lpa(gmii);
+ } else {
+ lpa = GM_LPA_10000 | LPA_DUPLEX;
+ adv = lpa;
+ }
+ EFX_INFO(efx, "link up at %dMbps %s-duplex "
+ "(adv %04x lpa %04x) (MTU %d)%s\n",
+ (efx->link_options & GM_LPA_10000 ? 10000 :
+ (efx->link_options & GM_LPA_1000 ? 1000 :
+ (efx->link_options & GM_LPA_100 ? 100 :
+ 10))),
+ (efx->link_options & GM_LPA_DUPLEX ?
+ "full" : "half"),
+ adv, lpa,
+ efx->net_dev->mtu,
+ (efx->promiscuous ? " [PROMISC]" : ""));
+ } else {
+ EFX_INFO(efx, "link down\n");
+ }
+
+}
+
+/* This call reinitialises the MAC to pick up new PHY settings. The
+ * caller must hold the mac_lock */
+static void __efx_reconfigure_port(struct efx_nic *efx)
+{
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ EFX_LOG(efx, "reconfiguring MAC from PHY settings on CPU %d\n",
+ raw_smp_processor_id());
+
+ falcon_reconfigure_xmac(efx);
+
+ /* Inform kernel of loss/gain of carrier */
+ efx_link_status_changed(efx);
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled. */
+void efx_reconfigure_port(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ mutex_lock(&efx->mac_lock);
+ __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* Asynchronous efx_reconfigure_port work item. To speed up efx_flush_all()
+ * we don't efx_reconfigure_port() if the port is disabled. Care is taken
+ * in efx_stop_all() and efx_start_port() to prevent PHY events being lost */
+static void efx_reconfigure_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic,
+ reconfigure_work);
+
+ mutex_lock(&efx->mac_lock);
+ if (efx->port_enabled)
+ __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+}
+
+static int efx_probe_port(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_LOG(efx, "create port\n");
+
+ /* Connect up MAC/PHY operations table and read MAC address */
+ rc = falcon_probe_port(efx);
+ if (rc)
+ goto err;
+
+ /* Sanity check MAC address */
+ if (is_valid_ether_addr(efx->mac_address)) {
+ memcpy(efx->net_dev->dev_addr, efx->mac_address, ETH_ALEN);
+ } else {
+ DECLARE_MAC_BUF(mac);
+
+ EFX_ERR(efx, "invalid MAC address %s\n",
+ print_mac(mac, efx->mac_address));
+ if (!allow_bad_hwaddr) {
+ rc = -EINVAL;
+ goto err;
+ }
+ random_ether_addr(efx->net_dev->dev_addr);
+ EFX_INFO(efx, "using locally-generated MAC %s\n",
+ print_mac(mac, efx->net_dev->dev_addr));
+ }
+
+ return 0;
+
+ err:
+ efx_remove_port(efx);
+ return rc;
+}
+
+static int efx_init_port(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_LOG(efx, "init port\n");
+
+ /* Initialise the MAC and PHY */
+ rc = falcon_init_xmac(efx);
+ if (rc)
+ return rc;
+
+ efx->port_initialized = 1;
+
+ /* Reconfigure port to program MAC registers */
+ falcon_reconfigure_xmac(efx);
+
+ return 0;
+}
+
+/* Allow efx_reconfigure_port() to be scheduled, and close the window
+ * between efx_stop_port and efx_flush_all whereby a previously scheduled
+ * efx_reconfigure_port() may have been cancelled */
+static void efx_start_port(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "start port\n");
+ BUG_ON(efx->port_enabled);
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = 1;
+ __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* Prevent efx_reconfigure_work and efx_monitor() from executing, and
+ * efx_set_multicast_list() from scheduling efx_reconfigure_work.
+ * efx_reconfigure_work can still be scheduled via NAPI processing
+ * until efx_flush_all() is called */
+static void efx_stop_port(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "stop port\n");
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = 0;
+ mutex_unlock(&efx->mac_lock);
+
+ /* Serialise against efx_set_multicast_list() */
+ if (NET_DEV_REGISTERED(efx)) {
+ netif_tx_lock_bh(efx->net_dev);
+ netif_tx_unlock_bh(efx->net_dev);
+ }
+}
+
+static void efx_fini_port(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "shut down port\n");
+
+ if (!efx->port_initialized)
+ return;
+
+ falcon_fini_xmac(efx);
+ efx->port_initialized = 0;
+
+ efx->link_up = 0;
+ efx_link_status_changed(efx);
+}
+
+static void efx_remove_port(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "destroying port\n");
+
+ falcon_remove_port(efx);
+}
+
+/**************************************************************************
+ *
+ * NIC handling
+ *
+ **************************************************************************/
+
+/* This configures the PCI device to enable I/O and DMA. */
+static int efx_init_io(struct efx_nic *efx)
+{
+ struct pci_dev *pci_dev = efx->pci_dev;
+ dma_addr_t dma_mask = efx->type->max_dma_mask;
+ int rc;
+
+ EFX_LOG(efx, "initialising I/O\n");
+
+ rc = pci_enable_device(pci_dev);
+ if (rc) {
+ EFX_ERR(efx, "failed to enable PCI device\n");
+ goto fail1;
+ }
+
+ pci_set_master(pci_dev);
+
+ /* Set the PCI DMA mask. Try all possibilities from our
+ * genuine mask down to 32 bits, because some architectures
+ * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+ * masks event though they reject 46 bit masks.
+ */
+ while (dma_mask > 0x7fffffffUL) {
+ if (pci_dma_supported(pci_dev, dma_mask) &&
+ ((rc = pci_set_dma_mask(pci_dev, dma_mask)) == 0))
+ break;
+ dma_mask >>= 1;
+ }
+ if (rc) {
+ EFX_ERR(efx, "could not find a suitable DMA mask\n");
+ goto fail2;
+ }
+ EFX_LOG(efx, "using DMA mask %llx\n", (unsigned long long) dma_mask);
+ rc = pci_set_consistent_dma_mask(pci_dev, dma_mask);
+ if (rc) {
+ /* pci_set_consistent_dma_mask() is not *allowed* to
+ * fail with a mask that pci_set_dma_mask() accepted,
+ * but just in case...
+ */
+ EFX_ERR(efx, "failed to set consistent DMA mask\n");
+ goto fail2;
+ }
+
+ efx->membase_phys = pci_resource_start(efx->pci_dev,
+ efx->type->mem_bar);
+ rc = pci_request_region(pci_dev, efx->type->mem_bar, "sfc");
+ if (rc) {
+ EFX_ERR(efx, "request for memory BAR failed\n");
+ rc = -EIO;
+ goto fail3;
+ }
+ efx->membase = ioremap_nocache(efx->membase_phys,
+ efx->type->mem_map_size);
+ if (!efx->membase) {
+ EFX_ERR(efx, "could not map memory BAR %d at %lx+%x\n",
+ efx->type->mem_bar, efx->membase_phys,
+ efx->type->mem_map_size);
+ rc = -ENOMEM;
+ goto fail4;
+ }
+ EFX_LOG(efx, "memory BAR %u at %lx+%x (virtual %p)\n",
+ efx->type->mem_bar, efx->membase_phys, efx->type->mem_map_size,
+ efx->membase);
+
+ return 0;
+
+ fail4:
+ release_mem_region(efx->membase_phys, efx->type->mem_map_size);
+ fail3:
+ efx->membase_phys = 0UL;
+ fail2:
+ pci_disable_device(efx->pci_dev);
+ fail1:
+ return rc;
+}
+
+static void efx_fini_io(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "shutting down I/O\n");
+
+ if (efx->membase) {
+ iounmap(efx->membase);
+ efx->membase = NULL;
+ }
+
+ if (efx->membase_phys) {
+ pci_release_region(efx->pci_dev, efx->type->mem_bar);
+ efx->membase_phys = 0UL;
+ }
+
+ pci_disable_device(efx->pci_dev);
+}
+
+/* Probe the number and type of interrupts we are able to obtain. */
+static void efx_probe_interrupts(struct efx_nic *efx)
+{
+ int max_channel = efx->type->phys_addr_channels - 1;
+ struct msix_entry xentries[EFX_MAX_CHANNELS];
+ int rc, i;
+
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+ BUG_ON(!pci_find_capability(efx->pci_dev, PCI_CAP_ID_MSIX));
+
+ efx->rss_queues = rss_cpus ? rss_cpus : num_online_cpus();
+ efx->rss_queues = min(efx->rss_queues, max_channel + 1);
+ efx->rss_queues = min(efx->rss_queues, EFX_MAX_CHANNELS);
+
+ /* Request maximum number of MSI interrupts, and fill out
+ * the channel interrupt information the allowed allocation */
+ for (i = 0; i < efx->rss_queues; i++)
+ xentries[i].entry = i;
+ rc = pci_enable_msix(efx->pci_dev, xentries, efx->rss_queues);
+ if (rc > 0) {
+ EFX_BUG_ON_PARANOID(rc >= efx->rss_queues);
+ efx->rss_queues = rc;
+ rc = pci_enable_msix(efx->pci_dev, xentries,
+ efx->rss_queues);
+ }
+
+ if (rc == 0) {
+ for (i = 0; i < efx->rss_queues; i++) {
+ efx->channel[i].has_interrupt = 1;
+ efx->channel[i].irq = xentries[i].vector;
+ }
+ } else {
+ /* Fall back to single channel MSI */
+ efx->interrupt_mode = EFX_INT_MODE_MSI;
+ EFX_ERR(efx, "could not enable MSI-X\n");
+ }
+ }
+
+ /* Try single interrupt MSI */
+ if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
+ efx->rss_queues = 1;
+ rc = pci_enable_msi(efx->pci_dev);
+ if (rc == 0) {
+ efx->channel[0].irq = efx->pci_dev->irq;
+ efx->channel[0].has_interrupt = 1;
+ } else {
+ EFX_ERR(efx, "could not enable MSI\n");
+ efx->interrupt_mode = EFX_INT_MODE_LEGACY;
+ }
+ }
+
+ /* Assume legacy interrupts */
+ if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
+ efx->rss_queues = 1;
+ /* Every channel is interruptible */
+ for (i = 0; i < EFX_MAX_CHANNELS; i++)
+ efx->channel[i].has_interrupt = 1;
+ efx->legacy_irq = efx->pci_dev->irq;
+ }
+}
+
+static void efx_remove_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ /* Remove MSI/MSI-X interrupts */
+ efx_for_each_channel_with_interrupt(channel, efx)
+ channel->irq = 0;
+ pci_disable_msi(efx->pci_dev);
+ pci_disable_msix(efx->pci_dev);
+
+ /* Remove legacy interrupt */
+ efx->legacy_irq = 0;
+}
+
+/* Select number of used resources
+ * Should be called after probe_interrupts()
+ */
+static void efx_select_used(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int i;
+
+ /* TX queues. One per port per channel with TX capability
+ * (more than one per port won't work on Linux, due to out
+ * of order issues... but will be fine on Solaris)
+ */
+ tx_queue = &efx->tx_queue[0];
+
+ /* Perform this for each channel with TX capabilities.
+ * At the moment, we only support a single TX queue
+ */
+ tx_queue->used = 1;
+ if ((!EFX_INT_MODE_USE_MSI(efx)) && separate_tx_and_rx_channels)
+ tx_queue->channel = &efx->channel[1];
+ else
+ tx_queue->channel = &efx->channel[0];
+ tx_queue->channel->used_flags |= EFX_USED_BY_TX;
+ tx_queue++;
+
+ /* RX queues. Each has a dedicated channel. */
+ for (i = 0; i < EFX_MAX_RX_QUEUES; i++) {
+ rx_queue = &efx->rx_queue[i];
+
+ if (i < efx->rss_queues) {
+ rx_queue->used = 1;
+ /* If we allow multiple RX queues per channel
+ * we need to decide that here
+ */
+ rx_queue->channel = &efx->channel[rx_queue->queue];
+ rx_queue->channel->used_flags |= EFX_USED_BY_RX;
+ rx_queue++;
+ }
+ }
+}
+
+static int efx_probe_nic(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_LOG(efx, "creating NIC\n");
+
+ /* Carry out hardware-type specific initialisation */
+ rc = falcon_probe_nic(efx);
+ if (rc)
+ return rc;
+
+ /* Determine the number of channels and RX queues by trying to hook
+ * in MSI-X interrupts. */
+ efx_probe_interrupts(efx);
+
+ /* Determine number of RX queues and TX queues */
+ efx_select_used(efx);
+
+ /* Initialise the interrupt moderation settings */
+ efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec);
+
+ return 0;
+}
+
+static void efx_remove_nic(struct efx_nic *efx)
+{
+ EFX_LOG(efx, "destroying NIC\n");
+
+ efx_remove_interrupts(efx);
+ falcon_remove_nic(efx);
+}
+
+/**************************************************************************
+ *
+ * NIC startup/shutdown
+ *
+ *************************************************************************/
+
+static int efx_probe_all(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ int rc;
+
+ /* Create NIC */
+ rc = efx_probe_nic(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to create NIC\n");
+ goto fail1;
+ }
+
+ /* Create port */
+ rc = efx_probe_port(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to create port\n");
+ goto fail2;
+ }
+
+ /* Create channels */
+ efx_for_each_channel(channel, efx) {
+ rc = efx_probe_channel(channel);
+ if (rc) {
+ EFX_ERR(efx, "failed to create channel %d\n",
+ channel->channel);
+ goto fail3;
+ }
+ }
+
+ return 0;
+
+ fail3:
+ efx_for_each_channel(channel, efx)
+ efx_remove_channel(channel);
+ efx_remove_port(efx);
+ fail2:
+ efx_remove_nic(efx);
+ fail1:
+ return rc;
+}
+
+/* Called after previous invocation(s) of efx_stop_all, restarts the
+ * port, kernel transmit queue, NAPI processing and hardware interrupts,
+ * and ensures that the port is scheduled to be reconfigured.
+ * This function is safe to call multiple times when the NIC is in any
+ * state. */
+static void efx_start_all(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ /* Check that it is appropriate to restart the interface. All
+ * of these flags are safe to read under just the rtnl lock */
+ if (efx->port_enabled)
+ return;
+ if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT))
+ return;
+ if (NET_DEV_REGISTERED(efx) && !netif_running(efx->net_dev))
+ return;
+
+ /* Mark the port as enabled so port reconfigurations can start, then
+ * restart the transmit interface early so the watchdog timer stops */
+ efx_start_port(efx);
+ efx_wake_queue(efx);
+
+ efx_for_each_channel(channel, efx)
+ efx_start_channel(channel);
+
+ falcon_enable_interrupts(efx);
+
+ /* Start hardware monitor if we're in RUNNING */
+ if (efx->state == STATE_RUNNING)
+ queue_delayed_work(efx->workqueue, &efx->monitor_work,
+ efx_monitor_interval);
+}
+
+/* Flush all delayed work. Should only be called when no more delayed work
+ * will be scheduled. This doesn't flush pending online resets (efx_reset),
+ * since we're holding the rtnl_lock at this point. */
+static void efx_flush_all(struct efx_nic *efx)
+{
+ struct efx_rx_queue *rx_queue;
+
+ /* Make sure the hardware monitor is stopped */
+ cancel_delayed_work_sync(&efx->monitor_work);
+
+ /* Ensure that all RX slow refills are complete. */
+ efx_for_each_rx_queue(rx_queue, efx) {
+ cancel_delayed_work_sync(&rx_queue->work);
+ }
+
+ /* Stop scheduled port reconfigurations */
+ cancel_work_sync(&efx->reconfigure_work);
+
+}
+
+/* Quiesce hardware and software without bringing the link down.
+ * Safe to call multiple times, when the nic and interface is in any
+ * state. The caller is guaranteed to subsequently be in a position
+ * to modify any hardware and software state they see fit without
+ * taking locks. */
+static void efx_stop_all(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ /* port_enabled can be read safely under the rtnl lock */
+ if (!efx->port_enabled)
+ return;
+
+ /* Disable interrupts and wait for ISR to complete */
+ falcon_disable_interrupts(efx);
+ if (efx->legacy_irq)
+ synchronize_irq(efx->legacy_irq);
+ efx_for_each_channel_with_interrupt(channel, efx)
+ if (channel->irq)
+ synchronize_irq(channel->irq);
+
+ /* Stop all NAPI processing and synchronous rx refills */
+ efx_for_each_channel(channel, efx)
+ efx_stop_channel(channel);
+
+ /* Stop all asynchronous port reconfigurations. Since all
+ * event processing has already been stopped, there is no
+ * window to loose phy events */
+ efx_stop_port(efx);
+
+ /* Flush reconfigure_work, refill_workqueue, monitor_work */
+ efx_flush_all(efx);
+
+ /* Isolate the MAC from the TX and RX engines, so that queue
+ * flushes will complete in a timely fashion. */
+ falcon_deconfigure_mac_wrapper(efx);
+ falcon_drain_tx_fifo(efx);
+
+ /* Stop the kernel transmit interface late, so the watchdog
+ * timer isn't ticking over the flush */
+ efx_stop_queue(efx);
+ if (NET_DEV_REGISTERED(efx)) {
+ netif_tx_lock_bh(efx->net_dev);
+ netif_tx_unlock_bh(efx->net_dev);
+ }
+}
+
+static void efx_remove_all(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_remove_channel(channel);
+ efx_remove_port(efx);
+ efx_remove_nic(efx);
+}
+
+/* A convinience function to safely flush all the queues */
+int efx_flush_queues(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ efx_stop_all(efx);
+
+ efx_fini_channels(efx);
+ rc = efx_init_channels(efx);
+ if (rc) {
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ return rc;
+ }
+
+ efx_start_all(efx);
+
+ return 0;
+}
+
+/**************************************************************************
+ *
+ * Interrupt moderation
+ *
+ **************************************************************************/
+
+/* Set interrupt moderation parameters */
+void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs, int rx_usecs)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ efx_for_each_tx_queue(tx_queue, efx)
+ tx_queue->channel->irq_moderation = tx_usecs;
+
+ efx_for_each_rx_queue(rx_queue, efx)
+ rx_queue->channel->irq_moderation = rx_usecs;
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue. Serialised against
+ * efx_reconfigure_port via the mac_lock */
+static void efx_monitor(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic,
+ monitor_work.work);
+ int rc = 0;
+
+ EFX_TRACE(efx, "hardware monitor executing on CPU %d\n",
+ raw_smp_processor_id());
+
+
+ /* If the mac_lock is already held then it is likely a port
+ * reconfiguration is already in place, which will likely do
+ * most of the work of check_hw() anyway. */
+ if (!mutex_trylock(&efx->mac_lock)) {
+ queue_delayed_work(efx->workqueue, &efx->monitor_work,
+ efx_monitor_interval);
+ return;
+ }
+
+ if (efx->port_enabled)
+ rc = falcon_check_xmac(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ if (rc) {
+ if (monitor_reset) {
+ EFX_ERR(efx, "hardware monitor detected a fault: "
+ "triggering reset\n");
+ efx_schedule_reset(efx, RESET_TYPE_MONITOR);
+ } else {
+ EFX_ERR(efx, "hardware monitor detected a fault, "
+ "skipping reset\n");
+ }
+ }
+
+ queue_delayed_work(efx->workqueue, &efx->monitor_work,
+ efx_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * ioctls
+ *
+ *************************************************************************/
+
+/* Net device ioctl
+ * Context: process, rtnl_lock() held.
+ */
+static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ return generic_mii_ioctl(&efx->mii, if_mii(ifr), cmd, NULL);
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ **************************************************************************/
+
+static int efx_init_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ int rc;
+
+ efx_for_each_channel(channel, efx) {
+ channel->napi_dev = efx->net_dev;
+ rc = efx_lro_init(&channel->lro_mgr, efx);
+ if (rc)
+ goto err;
+ }
+ return 0;
+ err:
+ efx_fini_napi(efx);
+ return rc;
+}
+
+static void efx_fini_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ efx_lro_fini(&channel->lro_mgr);
+ channel->napi_dev = NULL;
+ }
+}
+
+/**************************************************************************
+ *
+ * Kernel netpoll interface
+ *
+ *************************************************************************/
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+
+/* Although in the common case interrupts will be disabled, this is not
+ * guaranteed. However, all our work happens inside the NAPI callback,
+ * so no locking is required.
+ */
+static void efx_netpoll(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct efx_channel *channel;
+
+ efx_for_each_channel_with_interrupt(channel, efx)
+ efx_schedule_channel(channel);
+}
+
+#endif
+
+/**************************************************************************
+ *
+ * Kernel net device interface
+ *
+ *************************************************************************/
+
+/* Context: process, rtnl_lock() held. */
+static int efx_net_open(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ EFX_LOG(efx, "opening device %s on CPU %d\n", net_dev->name,
+ raw_smp_processor_id());
+
+ efx_start_all(efx);
+ return 0;
+}
+
+/* Context: process, rtnl_lock() held.
+ * Note that the kernel will ignore our return code; this method
+ * should really be a void.
+ */
+static int efx_net_stop(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ int rc;
+
+ EFX_LOG(efx, "closing %s on CPU %d\n", net_dev->name,
+ raw_smp_processor_id());
+
+ /* Stop the device and flush all the channels */
+ efx_stop_all(efx);
+ efx_fini_channels(efx);
+ rc = efx_init_channels(efx);
+ if (rc)
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+
+ return 0;
+}
+
+/* Context: process, dev_base_lock held, non-blocking. */
+static struct net_device_stats *efx_net_stats(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct efx_mac_stats *mac_stats = &efx->mac_stats;
+ struct net_device_stats *stats = &net_dev->stats;
+
+ if (!spin_trylock(&efx->stats_lock))
+ return stats;
+ if (efx->state == STATE_RUNNING) {
+ falcon_update_stats_xmac(efx);
+ falcon_update_nic_stats(efx);
+ }
+ spin_unlock(&efx->stats_lock);
+
+ stats->rx_packets = mac_stats->rx_packets;
+ stats->tx_packets = mac_stats->tx_packets;
+ stats->rx_bytes = mac_stats->rx_bytes;
+ stats->tx_bytes = mac_stats->tx_bytes;
+ stats->multicast = mac_stats->rx_multicast;
+ stats->collisions = mac_stats->tx_collision;
+ stats->rx_length_errors = (mac_stats->rx_gtjumbo +
+ mac_stats->rx_length_error);
+ stats->rx_over_errors = efx->n_rx_nodesc_drop_cnt;
+ stats->rx_crc_errors = mac_stats->rx_bad;
+ stats->rx_frame_errors = mac_stats->rx_align_error;
+ stats->rx_fifo_errors = mac_stats->rx_overflow;
+ stats->rx_missed_errors = mac_stats->rx_missed;
+ stats->tx_window_errors = mac_stats->tx_late_collision;
+
+ stats->rx_errors = (stats->rx_length_errors +
+ stats->rx_over_errors +
+ stats->rx_crc_errors +
+ stats->rx_frame_errors +
+ stats->rx_fifo_errors +
+ stats->rx_missed_errors +
+ mac_stats->rx_symbol_error);
+ stats->tx_errors = (stats->tx_window_errors +
+ mac_stats->tx_bad);
+
+ return stats;
+}
+
+/* Context: netif_tx_lock held, BHs disabled. */
+static void efx_watchdog(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ EFX_ERR(efx, "TX stuck with stop_count=%d port_enabled=%d: %s\n",
+ atomic_read(&efx->netif_stop_count), efx->port_enabled,
+ monitor_reset ? "resetting channels" : "skipping reset");
+
+ if (monitor_reset)
+ efx_schedule_reset(efx, RESET_TYPE_MONITOR);
+}
+
+
+/* Context: process, rtnl_lock() held. */
+static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+ struct efx_nic *efx = net_dev->priv;
+ int rc = 0;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (new_mtu > EFX_MAX_MTU)
+ return -EINVAL;
+
+ efx_stop_all(efx);
+
+ EFX_LOG(efx, "changing MTU to %d\n", new_mtu);
+
+ efx_fini_channels(efx);
+ net_dev->mtu = new_mtu;
+ rc = efx_init_channels(efx);
+ if (rc)
+ goto fail;
+
+ efx_start_all(efx);
+ return rc;
+
+ fail:
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ return rc;
+}
+
+static int efx_set_mac_address(struct net_device *net_dev, void *data)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct sockaddr *addr = data;
+ char *new_addr = addr->sa_data;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (!is_valid_ether_addr(new_addr)) {
+ DECLARE_MAC_BUF(mac);
+ EFX_ERR(efx, "invalid ethernet MAC address requested: %s\n",
+ print_mac(mac, new_addr));
+ return -EINVAL;
+ }
+
+ memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
+
+ /* Reconfigure the MAC */
+ efx_reconfigure_port(efx);
+
+ return 0;
+}
+
+/* Context: netif_tx_lock held, BHs disabled. */
+static void efx_set_multicast_list(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct dev_mc_list *mc_list = net_dev->mc_list;
+ union efx_multicast_hash *mc_hash = &efx->multicast_hash;
+ int promiscuous;
+ u32 crc;
+ int bit;
+ int i;
+
+ /* Set per-MAC promiscuity flag and reconfigure MAC if necessary */
+ promiscuous = (net_dev->flags & IFF_PROMISC) ? 1 : 0;
+ if (efx->promiscuous != promiscuous) {
+ efx->promiscuous = promiscuous;
+ /* Close the window between efx_stop_port() and efx_flush_all()
+ * by only queuing work when the port is enabled. */
+ if (efx->port_enabled)
+ queue_work(efx->workqueue, &efx->reconfigure_work);
+ }
+
+ /* Build multicast hash table */
+ if (promiscuous || (net_dev->flags & IFF_ALLMULTI)) {
+ memset(mc_hash, 0xff, sizeof(*mc_hash));
+ } else {
+ memset(mc_hash, 0x00, sizeof(*mc_hash));
+ for (i = 0; i < net_dev->mc_count; i++) {
+ crc = ether_crc_le(ETH_ALEN, mc_list->dmi_addr);
+ bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
+ set_bit_le(bit, mc_hash->byte);
+ mc_list = mc_list->next;
+ }
+ }
+
+ /* Create and activate new global multicast hash table */
+ falcon_set_multicast_hash(efx);
+}
+
+static int efx_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *net_dev = (struct net_device *)ptr;
+
+ if (net_dev->open == efx_net_open && event == NETDEV_CHANGENAME) {
+ struct efx_nic *efx = net_dev->priv;
+
+ strcpy(efx->name, net_dev->name);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block efx_netdev_notifier = {
+ .notifier_call = efx_netdev_event,
+};
+
+static int efx_register_netdev(struct efx_nic *efx)
+{
+ struct net_device *net_dev = efx->net_dev;
+ int rc;
+
+ net_dev->watchdog_timeo = 5 * HZ;
+ net_dev->irq = efx->pci_dev->irq;
+ net_dev->open = efx_net_open;
+ net_dev->stop = efx_net_stop;
+ net_dev->get_stats = efx_net_stats;
+ net_dev->tx_timeout = &efx_watchdog;
+ net_dev->hard_start_xmit = efx_hard_start_xmit;
+ net_dev->do_ioctl = efx_ioctl;
+ net_dev->change_mtu = efx_change_mtu;
+ net_dev->set_mac_address = efx_set_mac_address;
+ net_dev->set_multicast_list = efx_set_multicast_list;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ net_dev->poll_controller = efx_netpoll;
+#endif
+ SET_NETDEV_DEV(net_dev, &efx->pci_dev->dev);
+ SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
+
+ /* Always start with carrier off; PHY events will detect the link */
+ netif_carrier_off(efx->net_dev);
+
+ /* Clear MAC statistics */
+ falcon_update_stats_xmac(efx);
+ memset(&efx->mac_stats, 0, sizeof(efx->mac_stats));
+
+ rc = register_netdev(net_dev);
+ if (rc) {
+ EFX_ERR(efx, "could not register net dev\n");
+ return rc;
+ }
+ strcpy(efx->name, net_dev->name);
+
+ return 0;
+}
+
+static void efx_unregister_netdev(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+
+ if (!efx->net_dev)
+ return;
+
+ BUG_ON(efx->net_dev->priv != efx);
+
+ /* Free up any skbs still remaining. This has to happen before
+ * we try to unregister the netdev as running their destructors
+ * may be needed to get the device ref. count to 0. */
+ efx_for_each_tx_queue(tx_queue, efx)
+ efx_release_tx_buffers(tx_queue);
+
+ if (NET_DEV_REGISTERED(efx)) {
+ strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
+ unregister_netdev(efx->net_dev);
+ }
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+/* The final hardware and software finalisation before reset. */
+static int efx_reset_down(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ rc = falcon_xmac_get_settings(efx, ecmd);
+ if (rc) {
+ EFX_ERR(efx, "could not back up PHY settings\n");
+ goto fail;
+ }
+
+ efx_fini_channels(efx);
+ return 0;
+
+ fail:
+ return rc;
+}
+
+/* The first part of software initialisation after a hardware reset
+ * This function does not handle serialisation with the kernel, it
+ * assumes the caller has done this */
+static int efx_reset_up(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+{
+ int rc;
+
+ rc = efx_init_channels(efx);
+ if (rc)
+ goto fail1;
+
+ /* Restore MAC and PHY settings. */
+ rc = falcon_xmac_set_settings(efx, ecmd);
+ if (rc) {
+ EFX_ERR(efx, "could not restore PHY settings\n");
+ goto fail2;
+ }
+
+ return 0;
+
+ fail2:
+ efx_fini_channels(efx);
+ fail1:
+ return rc;
+}
+
+/* Reset the NIC as transparently as possible. Do not reset the PHY
+ * Note that the reset may fail, in which case the card will be left
+ * in a most-probably-unusable state.
+ *
+ * This function will sleep. You cannot reset from within an atomic
+ * state; use efx_schedule_reset() instead.
+ *
+ * Grabs the rtnl_lock.
+ */
+static int efx_reset(struct efx_nic *efx)
+{
+ struct ethtool_cmd ecmd;
+ enum reset_type method = efx->reset_pending;
+ int rc;
+
+ /* Serialise with kernel interfaces */
+ rtnl_lock();
+
+ /* If we're not RUNNING then don't reset. Leave the reset_pending
+ * flag set so that efx_pci_probe_main will be retried */
+ if (efx->state != STATE_RUNNING) {
+ EFX_INFO(efx, "scheduled reset quenched. NIC not RUNNING\n");
+ goto unlock_rtnl;
+ }
+
+ efx->state = STATE_RESETTING;
+ EFX_INFO(efx, "resetting (%d)\n", method);
+
+ /* The net_dev->get_stats handler is quite slow, and will fail
+ * if a fetch is pending over reset. Serialise against it. */
+ spin_lock(&efx->stats_lock);
+ spin_unlock(&efx->stats_lock);
+
+ efx_stop_all(efx);
+ mutex_lock(&efx->mac_lock);
+
+ rc = efx_reset_down(efx, &ecmd);
+ if (rc)
+ goto fail1;
+
+ rc = falcon_reset_hw(efx, method);
+ if (rc) {
+ EFX_ERR(efx, "failed to reset hardware\n");
+ goto fail2;
+ }
+
+ /* Allow resets to be rescheduled. */
+ efx->reset_pending = RESET_TYPE_NONE;
+
+ /* Reinitialise bus-mastering, which may have been turned off before
+ * the reset was scheduled. This is still appropriate, even in the
+ * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+ * can respond to requests. */
+ pci_set_master(efx->pci_dev);
+
+ /* Reinitialise device. This is appropriate in the RESET_TYPE_DISABLE
+ * case so the driver can talk to external SRAM */
+ rc = falcon_init_nic(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to initialise NIC\n");
+ goto fail3;
+ }
+
+ /* Leave device stopped if necessary */
+ if (method == RESET_TYPE_DISABLE) {
+ /* Reinitialise the device anyway so the driver unload sequence
+ * can talk to the external SRAM */
+ (void) falcon_init_nic(efx);
+ rc = -EIO;
+ goto fail4;
+ }
+
+ rc = efx_reset_up(efx, &ecmd);
+ if (rc)
+ goto fail5;
+
+ mutex_unlock(&efx->mac_lock);
+ EFX_LOG(efx, "reset complete\n");
+
+ efx->state = STATE_RUNNING;
+ efx_start_all(efx);
+
+ unlock_rtnl:
+ rtnl_unlock();
+ return 0;
+
+ fail5:
+ fail4:
+ fail3:
+ fail2:
+ fail1:
+ EFX_ERR(efx, "has been disabled\n");
+ efx->state = STATE_DISABLED;
+
+ mutex_unlock(&efx->mac_lock);
+ rtnl_unlock();
+ efx_unregister_netdev(efx);
+ efx_fini_port(efx);
+ return rc;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void efx_reset_work(struct work_struct *data)
+{
+ struct efx_nic *nic = container_of(data, struct efx_nic, reset_work);
+
+ efx_reset(nic);
+}
+
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+{
+ enum reset_type method;
+
+ if (efx->reset_pending != RESET_TYPE_NONE) {
+ EFX_INFO(efx, "quenching already scheduled reset\n");
+ return;
+ }
+
+ switch (type) {
+ case RESET_TYPE_INVISIBLE:
+ case RESET_TYPE_ALL:
+ case RESET_TYPE_WORLD:
+ case RESET_TYPE_DISABLE:
+ method = type;
+ break;
+ case RESET_TYPE_RX_RECOVERY:
+ case RESET_TYPE_RX_DESC_FETCH:
+ case RESET_TYPE_TX_DESC_FETCH:
+ case RESET_TYPE_TX_SKIP:
+ method = RESET_TYPE_INVISIBLE;
+ break;
+ default:
+ method = RESET_TYPE_ALL;
+ break;
+ }
+
+ if (method != type)
+ EFX_LOG(efx, "scheduling reset (%d:%d)\n", type, method);
+ else
+ EFX_LOG(efx, "scheduling reset (%d)\n", method);
+
+ efx->reset_pending = method;
+
+ queue_work(efx->workqueue, &efx->reset_work);
+}
+
+/**************************************************************************
+ *
+ * List of NICs we support
+ *
+ **************************************************************************/
+
+/* PCI device ID table */
+static struct pci_device_id efx_pci_table[] __devinitdata = {
+ {PCI_DEVICE(EFX_VENDID_SFC, FALCON_A_P_DEVID),
+ .driver_data = (unsigned long) &falcon_a_nic_type},
+ {PCI_DEVICE(EFX_VENDID_SFC, FALCON_B_P_DEVID),
+ .driver_data = (unsigned long) &falcon_b_nic_type},
+ {0} /* end of list */
+};
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC/Board operations
+ *
+ * Can be used where the MAC does not implement this operation
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int efx_port_dummy_op_int(struct efx_nic *efx)
+{
+ return 0;
+}
+void efx_port_dummy_op_void(struct efx_nic *efx) {}
+void efx_port_dummy_op_blink(struct efx_nic *efx, int blink) {}
+
+static struct efx_phy_operations efx_dummy_phy_operations = {
+ .init = efx_port_dummy_op_int,
+ .reconfigure = efx_port_dummy_op_void,
+ .check_hw = efx_port_dummy_op_int,
+ .fini = efx_port_dummy_op_void,
+ .clear_interrupt = efx_port_dummy_op_void,
+ .reset_xaui = efx_port_dummy_op_void,
+};
+
+/* Dummy board operations */
+static int efx_nic_dummy_op_int(struct efx_nic *nic)
+{
+ return 0;
+}
+
+static struct efx_board efx_dummy_board_info = {
+ .init = efx_nic_dummy_op_int,
+ .init_leds = efx_port_dummy_op_int,
+ .set_fault_led = efx_port_dummy_op_blink,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * efx_nic (including all sub-structures).
+ */
+static int efx_init_struct(struct efx_nic *efx, struct efx_nic_type *type,
+ struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int i, rc;
+
+ /* Initialise common structures */
+ memset(efx, 0, sizeof(*efx));
+ spin_lock_init(&efx->biu_lock);
+ spin_lock_init(&efx->phy_lock);
+ INIT_WORK(&efx->reset_work, efx_reset_work);
+ INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
+ efx->pci_dev = pci_dev;
+ efx->state = STATE_INIT;
+ efx->reset_pending = RESET_TYPE_NONE;
+ strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+ efx->board_info = efx_dummy_board_info;
+
+ efx->net_dev = net_dev;
+ efx->rx_checksum_enabled = 1;
+ spin_lock_init(&efx->netif_stop_lock);
+ spin_lock_init(&efx->stats_lock);
+ mutex_init(&efx->mac_lock);
+ efx->phy_op = &efx_dummy_phy_operations;
+ efx->mii.dev = net_dev;
+ INIT_WORK(&efx->reconfigure_work, efx_reconfigure_work);
+ atomic_set(&efx->netif_stop_count, 1);
+
+ for (i = 0; i < EFX_MAX_CHANNELS; i++) {
+ channel = &efx->channel[i];
+ channel->efx = efx;
+ channel->channel = i;
+ channel->evqnum = i;
+ channel->work_pending = 0;
+ }
+ for (i = 0; i < EFX_MAX_TX_QUEUES; i++) {
+ tx_queue = &efx->tx_queue[i];
+ tx_queue->efx = efx;
+ tx_queue->queue = i;
+ tx_queue->buffer = NULL;
+ tx_queue->channel = &efx->channel[0]; /* for safety */
+ }
+ for (i = 0; i < EFX_MAX_RX_QUEUES; i++) {
+ rx_queue = &efx->rx_queue[i];
+ rx_queue->efx = efx;
+ rx_queue->queue = i;
+ rx_queue->channel = &efx->channel[0]; /* for safety */
+ rx_queue->buffer = NULL;
+ spin_lock_init(&rx_queue->add_lock);
+ INIT_DELAYED_WORK(&rx_queue->work, efx_rx_work);
+ }
+
+ efx->type = type;
+
+ /* Sanity-check NIC type */
+ EFX_BUG_ON_PARANOID(efx->type->txd_ring_mask &
+ (efx->type->txd_ring_mask + 1));
+ EFX_BUG_ON_PARANOID(efx->type->rxd_ring_mask &
+ (efx->type->rxd_ring_mask + 1));
+ EFX_BUG_ON_PARANOID(efx->type->evq_size &
+ (efx->type->evq_size - 1));
+ /* As close as we can get to guaranteeing that we don't overflow */
+ EFX_BUG_ON_PARANOID(efx->type->evq_size <
+ (efx->type->txd_ring_mask + 1 +
+ efx->type->rxd_ring_mask + 1));
+ EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS);
+
+ /* Higher numbered interrupt modes are less capable! */
+ efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+ interrupt_mode);
+
+ efx->workqueue = create_singlethread_workqueue("sfc_work");
+ if (!efx->workqueue) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+
+ return 0;
+
+ fail1:
+ return rc;
+}
+
+static void efx_fini_struct(struct efx_nic *efx)
+{
+ if (efx->workqueue) {
+ destroy_workqueue(efx->workqueue);
+ efx->workqueue = NULL;
+ }
+}
+
+/**************************************************************************
+ *
+ * PCI interface
+ *
+ **************************************************************************/
+
+/* Main body of final NIC shutdown code
+ * This is called only at module unload (or hotplug removal).
+ */
+static void efx_pci_remove_main(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ /* Skip everything if we never obtained a valid membase */
+ if (!efx->membase)
+ return;
+
+ efx_fini_channels(efx);
+ efx_fini_port(efx);
+
+ /* Shutdown the board, then the NIC and board state */
+ falcon_fini_interrupt(efx);
+
+ efx_fini_napi(efx);
+ efx_remove_all(efx);
+}
+
+/* Final NIC shutdown
+ * This is called only at module unload (or hotplug removal).
+ */
+static void efx_pci_remove(struct pci_dev *pci_dev)
+{
+ struct efx_nic *efx;
+
+ efx = pci_get_drvdata(pci_dev);
+ if (!efx)
+ return;
+
+ /* Mark the NIC as fini, then stop the interface */
+ rtnl_lock();
+ efx->state = STATE_FINI;
+ dev_close(efx->net_dev);
+
+ /* Allow any queued efx_resets() to complete */
+ rtnl_unlock();
+
+ if (efx->membase == NULL)
+ goto out;
+
+ efx_unregister_netdev(efx);
+
+ /* Wait for any scheduled resets to complete. No more will be
+ * scheduled from this point because efx_stop_all() has been
+ * called, we are no longer registered with driverlink, and
+ * the net_device's have been removed. */
+ flush_workqueue(efx->workqueue);
+
+ efx_pci_remove_main(efx);
+
+out:
+ efx_fini_io(efx);
+ EFX_LOG(efx, "shutdown successful\n");
+
+ pci_set_drvdata(pci_dev, NULL);
+ efx_fini_struct(efx);
+ free_netdev(efx->net_dev);
+};
+
+/* Main body of NIC initialisation
+ * This is called at module load (or hotplug insertion, theoretically).
+ */
+static int efx_pci_probe_main(struct efx_nic *efx)
+{
+ int rc;
+
+ /* Do start-of-day initialisation */
+ rc = efx_probe_all(efx);
+ if (rc)
+ goto fail1;
+
+ rc = efx_init_napi(efx);
+ if (rc)
+ goto fail2;
+
+ /* Initialise the board */
+ rc = efx->board_info.init(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to initialise board\n");
+ goto fail3;
+ }
+
+ rc = falcon_init_nic(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to initialise NIC\n");
+ goto fail4;
+ }
+
+ rc = efx_init_port(efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to initialise port\n");
+ goto fail5;
+ }
+
+ rc = efx_init_channels(efx);
+ if (rc)
+ goto fail6;
+
+ rc = falcon_init_interrupt(efx);
+ if (rc)
+ goto fail7;
+
+ return 0;
+
+ fail7:
+ efx_fini_channels(efx);
+ fail6:
+ efx_fini_port(efx);
+ fail5:
+ fail4:
+ fail3:
+ efx_fini_napi(efx);
+ fail2:
+ efx_remove_all(efx);
+ fail1:
+ return rc;
+}
+
+/* NIC initialisation
+ *
+ * This is called at module load (or hotplug insertion,
+ * theoretically). It sets up PCI mappings, tests and resets the NIC,
+ * sets up and registers the network devices with the kernel and hooks
+ * the interrupt service routine. It does not prepare the device for
+ * transmission; this is left to the first time one of the network
+ * interfaces is brought up (i.e. efx_net_open).
+ */
+static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
+ const struct pci_device_id *entry)
+{
+ struct efx_nic_type *type = (struct efx_nic_type *) entry->driver_data;
+ struct net_device *net_dev;
+ struct efx_nic *efx;
+ int i, rc;
+
+ /* Allocate and initialise a struct net_device and struct efx_nic */
+ net_dev = alloc_etherdev(sizeof(*efx));
+ if (!net_dev)
+ return -ENOMEM;
+ net_dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA;
+ if (lro)
+ net_dev->features |= NETIF_F_LRO;
+ efx = net_dev->priv;
+ pci_set_drvdata(pci_dev, efx);
+ rc = efx_init_struct(efx, type, pci_dev, net_dev);
+ if (rc)
+ goto fail1;
+
+ EFX_INFO(efx, "Solarflare Communications NIC detected\n");
+
+ /* Set up basic I/O (BAR mappings etc) */
+ rc = efx_init_io(efx);
+ if (rc)
+ goto fail2;
+
+ /* No serialisation is required with the reset path because
+ * we're in STATE_INIT. */
+ for (i = 0; i < 5; i++) {
+ rc = efx_pci_probe_main(efx);
+ if (rc == 0)
+ break;
+
+ /* Serialise against efx_reset(). No more resets will be
+ * scheduled since efx_stop_all() has been called, and we
+ * have not and never have been registered with either
+ * the rtnetlink or driverlink layers. */
+ cancel_work_sync(&efx->reset_work);
+
+ /* Retry if a recoverably reset event has been scheduled */
+ if ((efx->reset_pending != RESET_TYPE_INVISIBLE) &&
+ (efx->reset_pending != RESET_TYPE_ALL))
+ goto fail3;
+
+ efx->reset_pending = RESET_TYPE_NONE;
+ }
+
+ if (rc) {
+ EFX_ERR(efx, "Could not reset NIC\n");
+ goto fail4;
+ }
+
+ /* Switch to the running state before we expose the device to
+ * the OS. This is to ensure that the initial gathering of
+ * MAC stats succeeds. */
+ rtnl_lock();
+ efx->state = STATE_RUNNING;
+ rtnl_unlock();
+
+ rc = efx_register_netdev(efx);
+ if (rc)
+ goto fail5;
+
+ EFX_LOG(efx, "initialisation successful\n");
+
+ return 0;
+
+ fail5:
+ efx_pci_remove_main(efx);
+ fail4:
+ fail3:
+ efx_fini_io(efx);
+ fail2:
+ efx_fini_struct(efx);
+ fail1:
+ EFX_LOG(efx, "initialisation failed. rc=%d\n", rc);
+ free_netdev(net_dev);
+ return rc;
+}
+
+static struct pci_driver efx_pci_driver = {
+ .name = EFX_DRIVER_NAME,
+ .id_table = efx_pci_table,
+ .probe = efx_pci_probe,
+ .remove = efx_pci_remove,
+};
+
+/**************************************************************************
+ *
+ * Kernel module interface
+ *
+ *************************************************************************/
+
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+ "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+static int __init efx_init_module(void)
+{
+ int rc;
+
+ printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
+
+ rc = register_netdevice_notifier(&efx_netdev_notifier);
+ if (rc)
+ goto err_notifier;
+
+ refill_workqueue = create_workqueue("sfc_refill");
+ if (!refill_workqueue) {
+ rc = -ENOMEM;
+ goto err_refill;
+ }
+
+ rc = pci_register_driver(&efx_pci_driver);
+ if (rc < 0)
+ goto err_pci;
+
+ return 0;
+
+ err_pci:
+ destroy_workqueue(refill_workqueue);
+ err_refill:
+ unregister_netdevice_notifier(&efx_netdev_notifier);
+ err_notifier:
+ return rc;
+}
+
+static void __exit efx_exit_module(void)
+{
+ printk(KERN_INFO "Solarflare NET driver unloading\n");
+
+ pci_unregister_driver(&efx_pci_driver);
+ destroy_workqueue(refill_workqueue);
+ unregister_netdevice_notifier(&efx_netdev_notifier);
+
+}
+
+module_init(efx_init_module);
+module_exit(efx_exit_module);
+
+MODULE_AUTHOR("Michael Brown <mbrown@fensystems.co.uk> and "
+ "Solarflare Communications");
+MODULE_DESCRIPTION("Solarflare Communications network driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, efx_pci_table);
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
new file mode 100644
index 00000000000..3b2f69f4a9a
--- /dev/null
+++ b/drivers/net/sfc/efx.h
@@ -0,0 +1,67 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_EFX_H
+#define EFX_EFX_H
+
+#include "net_driver.h"
+
+/* PCI IDs */
+#define EFX_VENDID_SFC 0x1924
+#define FALCON_A_P_DEVID 0x0703
+#define FALCON_A_S_DEVID 0x6703
+#define FALCON_B_P_DEVID 0x0710
+
+/* TX */
+extern int efx_xmit(struct efx_nic *efx,
+ struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+extern void efx_stop_queue(struct efx_nic *efx);
+extern void efx_wake_queue(struct efx_nic *efx);
+
+/* RX */
+extern void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
+ unsigned int len, int checksummed, int discard);
+extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue, int delay);
+
+/* Channels */
+extern void efx_process_channel_now(struct efx_channel *channel);
+extern int efx_flush_queues(struct efx_nic *efx);
+
+/* Ports */
+extern void efx_reconfigure_port(struct efx_nic *efx);
+
+/* Global */
+extern void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+extern void efx_suspend(struct efx_nic *efx);
+extern void efx_resume(struct efx_nic *efx);
+extern void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs,
+ int rx_usecs);
+extern int efx_request_power(struct efx_nic *efx, int mw, const char *name);
+extern void efx_hex_dump(const u8 *, unsigned int, const char *);
+
+/* Dummy PHY ops for PHY drivers */
+extern int efx_port_dummy_op_int(struct efx_nic *efx);
+extern void efx_port_dummy_op_void(struct efx_nic *efx);
+extern void efx_port_dummy_op_blink(struct efx_nic *efx, int blink);
+
+
+extern unsigned int efx_monitor_interval;
+
+static inline void efx_schedule_channel(struct efx_channel *channel)
+{
+ EFX_TRACE(channel->efx, "channel %d scheduling NAPI poll on CPU%d\n",
+ channel->channel, raw_smp_processor_id());
+ channel->work_pending = 1;
+
+ netif_rx_schedule(channel->napi_dev, &channel->napi_str);
+}
+
+#endif /* EFX_EFX_H */
diff --git a/drivers/net/sfc/enum.h b/drivers/net/sfc/enum.h
new file mode 100644
index 00000000000..43663a4619d
--- /dev/null
+++ b/drivers/net/sfc/enum.h
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_ENUM_H
+#define EFX_ENUM_H
+
+/*****************************************************************************/
+
+/**
+ * enum reset_type - reset types
+ *
+ * %RESET_TYPE_INVSIBLE, %RESET_TYPE_ALL, %RESET_TYPE_WORLD and
+ * %RESET_TYPE_DISABLE specify the method/scope of the reset. The
+ * other valuesspecify reasons, which efx_schedule_reset() will choose
+ * a method for.
+ *
+ * @RESET_TYPE_INVISIBLE: don't reset the PHYs or interrupts
+ * @RESET_TYPE_ALL: reset everything but PCI core blocks
+ * @RESET_TYPE_WORLD: reset everything, save & restore PCI config
+ * @RESET_TYPE_DISABLE: disable NIC
+ * @RESET_TYPE_MONITOR: reset due to hardware monitor
+ * @RESET_TYPE_INT_ERROR: reset due to internal error
+ * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
+ * @RESET_TYPE_RX_DESC_FETCH: pcie error during rx descriptor fetch
+ * @RESET_TYPE_TX_DESC_FETCH: pcie error during tx descriptor fetch
+ * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
+ */
+enum reset_type {
+ RESET_TYPE_NONE = -1,
+ RESET_TYPE_INVISIBLE = 0,
+ RESET_TYPE_ALL = 1,
+ RESET_TYPE_WORLD = 2,
+ RESET_TYPE_DISABLE = 3,
+ RESET_TYPE_MAX_METHOD,
+ RESET_TYPE_MONITOR,
+ RESET_TYPE_INT_ERROR,
+ RESET_TYPE_RX_RECOVERY,
+ RESET_TYPE_RX_DESC_FETCH,
+ RESET_TYPE_TX_DESC_FETCH,
+ RESET_TYPE_TX_SKIP,
+ RESET_TYPE_MAX,
+};
+
+#endif /* EFX_ENUM_H */
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
new file mode 100644
index 00000000000..ad541badbd9
--- /dev/null
+++ b/drivers/net/sfc/ethtool.c
@@ -0,0 +1,460 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "ethtool.h"
+#include "falcon.h"
+#include "gmii.h"
+#include "mac.h"
+
+static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable);
+
+struct ethtool_string {
+ char name[ETH_GSTRING_LEN];
+};
+
+struct efx_ethtool_stat {
+ const char *name;
+ enum {
+ EFX_ETHTOOL_STAT_SOURCE_mac_stats,
+ EFX_ETHTOOL_STAT_SOURCE_nic,
+ EFX_ETHTOOL_STAT_SOURCE_channel
+ } source;
+ unsigned offset;
+ u64(*get_stat) (void *field); /* Reader function */
+};
+
+/* Initialiser for a struct #efx_ethtool_stat with type-checking */
+#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+ get_stat_function) { \
+ .name = #stat_name, \
+ .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \
+ .offset = ((((field_type *) 0) == \
+ &((struct efx_##source_name *)0)->field) ? \
+ offsetof(struct efx_##source_name, field) : \
+ offsetof(struct efx_##source_name, field)), \
+ .get_stat = get_stat_function, \
+}
+
+static u64 efx_get_uint_stat(void *field)
+{
+ return *(unsigned int *)field;
+}
+
+static u64 efx_get_ulong_stat(void *field)
+{
+ return *(unsigned long *)field;
+}
+
+static u64 efx_get_u64_stat(void *field)
+{
+ return *(u64 *) field;
+}
+
+static u64 efx_get_atomic_stat(void *field)
+{
+ return atomic_read((atomic_t *) field);
+}
+
+#define EFX_ETHTOOL_ULONG_MAC_STAT(field) \
+ EFX_ETHTOOL_STAT(field, mac_stats, field, \
+ unsigned long, efx_get_ulong_stat)
+
+#define EFX_ETHTOOL_U64_MAC_STAT(field) \
+ EFX_ETHTOOL_STAT(field, mac_stats, field, \
+ u64, efx_get_u64_stat)
+
+#define EFX_ETHTOOL_UINT_NIC_STAT(name) \
+ EFX_ETHTOOL_STAT(name, nic, n_##name, \
+ unsigned int, efx_get_uint_stat)
+
+#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \
+ EFX_ETHTOOL_STAT(field, nic, field, \
+ atomic_t, efx_get_atomic_stat)
+
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \
+ EFX_ETHTOOL_STAT(field, channel, n_##field, \
+ unsigned int, efx_get_uint_stat)
+
+static struct efx_ethtool_stat efx_ethtool_stats[] = {
+ EFX_ETHTOOL_U64_MAC_STAT(tx_bytes),
+ EFX_ETHTOOL_U64_MAC_STAT(tx_good_bytes),
+ EFX_ETHTOOL_U64_MAC_STAT(tx_bad_bytes),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_packets),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_bad),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_pause),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_control),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_unicast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_multicast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_broadcast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_lt64),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_64),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_65_to_127),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_128_to_255),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_256_to_511),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_512_to_1023),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_1024_to_15xx),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_15xx_to_jumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_gtjumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_collision),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_single_collision),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_multiple_collision),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_excessive_collision),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_deferred),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_late_collision),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_excessive_deferred),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_non_tcpudp),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_mac_src_error),
+ EFX_ETHTOOL_ULONG_MAC_STAT(tx_ip_src_error),
+ EFX_ETHTOOL_U64_MAC_STAT(rx_bytes),
+ EFX_ETHTOOL_U64_MAC_STAT(rx_good_bytes),
+ EFX_ETHTOOL_U64_MAC_STAT(rx_bad_bytes),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_packets),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_good),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_pause),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_control),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_unicast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_multicast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_broadcast),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_lt64),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_64),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_65_to_127),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_128_to_255),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_256_to_511),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_512_to_1023),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_1024_to_15xx),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_15xx_to_jumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_gtjumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_lt64),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_64_to_15xx),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_15xx_to_jumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_bad_gtjumbo),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_overflow),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_missed),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_false_carrier),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_symbol_error),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_align_error),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_length_error),
+ EFX_ETHTOOL_ULONG_MAC_STAT(rx_internal_error),
+ EFX_ETHTOOL_UINT_NIC_STAT(rx_nodesc_drop_cnt),
+ EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+};
+
+/* Number of ethtool statistics */
+#define EFX_ETHTOOL_NUM_STATS ARRAY_SIZE(efx_ethtool_stats)
+
+/**************************************************************************
+ *
+ * Ethtool operations
+ *
+ **************************************************************************
+ */
+
+/* Identify device by flashing LEDs */
+static int efx_ethtool_phys_id(struct net_device *net_dev, u32 seconds)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ efx->board_info.blink(efx, 1);
+ schedule_timeout_interruptible(seconds * HZ);
+ efx->board_info.blink(efx, 0);
+ return 0;
+}
+
+/* This must be called with rtnl_lock held. */
+int efx_ethtool_get_settings(struct net_device *net_dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct efx_nic *efx = net_dev->priv;
+ int rc;
+
+ mutex_lock(&efx->mac_lock);
+ rc = falcon_xmac_get_settings(efx, ecmd);
+ mutex_unlock(&efx->mac_lock);
+
+ return rc;
+}
+
+/* This must be called with rtnl_lock held. */
+int efx_ethtool_set_settings(struct net_device *net_dev,
+ struct ethtool_cmd *ecmd)
+{
+ struct efx_nic *efx = net_dev->priv;
+ int rc;
+
+ mutex_lock(&efx->mac_lock);
+ rc = falcon_xmac_set_settings(efx, ecmd);
+ mutex_unlock(&efx->mac_lock);
+ if (!rc)
+ efx_reconfigure_port(efx);
+
+ return rc;
+}
+
+static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *info)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ strlcpy(info->driver, EFX_DRIVER_NAME, sizeof(info->driver));
+ strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
+ strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+static int efx_ethtool_get_stats_count(struct net_device *net_dev)
+{
+ return EFX_ETHTOOL_NUM_STATS;
+}
+
+static void efx_ethtool_get_strings(struct net_device *net_dev,
+ u32 string_set, u8 *strings)
+{
+ struct ethtool_string *ethtool_strings =
+ (struct ethtool_string *)strings;
+ int i;
+
+ if (string_set == ETH_SS_STATS)
+ for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++)
+ strncpy(ethtool_strings[i].name,
+ efx_ethtool_stats[i].name,
+ sizeof(ethtool_strings[i].name));
+}
+
+static void efx_ethtool_get_stats(struct net_device *net_dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct efx_mac_stats *mac_stats = &efx->mac_stats;
+ struct efx_ethtool_stat *stat;
+ struct efx_channel *channel;
+ int i;
+
+ EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS);
+
+ /* Update MAC and NIC statistics */
+ net_dev->get_stats(net_dev);
+
+ /* Fill detailed statistics buffer */
+ for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) {
+ stat = &efx_ethtool_stats[i];
+ switch (stat->source) {
+ case EFX_ETHTOOL_STAT_SOURCE_mac_stats:
+ data[i] = stat->get_stat((void *)mac_stats +
+ stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_nic:
+ data[i] = stat->get_stat((void *)efx + stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_channel:
+ data[i] = 0;
+ efx_for_each_channel(channel, efx)
+ data[i] += stat->get_stat((void *)channel +
+ stat->offset);
+ break;
+ }
+ }
+}
+
+static int efx_ethtool_set_tx_csum(struct net_device *net_dev, u32 enable)
+{
+ struct efx_nic *efx = net_dev->priv;
+ int rc;
+
+ rc = ethtool_op_set_tx_csum(net_dev, enable);
+ if (rc)
+ return rc;
+
+ efx_flush_queues(efx);
+
+ return 0;
+}
+
+static int efx_ethtool_set_rx_csum(struct net_device *net_dev, u32 enable)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ /* No way to stop the hardware doing the checks; we just
+ * ignore the result.
+ */
+ efx->rx_checksum_enabled = (enable ? 1 : 0);
+
+ return 0;
+}
+
+static u32 efx_ethtool_get_rx_csum(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ return efx->rx_checksum_enabled;
+}
+
+/* Restart autonegotiation */
+static int efx_ethtool_nway_reset(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ return mii_nway_restart(&efx->mii);
+}
+
+static u32 efx_ethtool_get_link(struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ return efx->link_up;
+}
+
+static int efx_ethtool_get_coalesce(struct net_device *net_dev,
+ struct ethtool_coalesce *coalesce)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+
+ memset(coalesce, 0, sizeof(*coalesce));
+
+ /* Find lowest IRQ moderation across all used TX queues */
+ coalesce->tx_coalesce_usecs_irq = ~((u32) 0);
+ efx_for_each_tx_queue(tx_queue, efx) {
+ channel = tx_queue->channel;
+ if (channel->irq_moderation < coalesce->tx_coalesce_usecs_irq) {
+ if (channel->used_flags != EFX_USED_BY_RX_TX)
+ coalesce->tx_coalesce_usecs_irq =
+ channel->irq_moderation;
+ else
+ coalesce->tx_coalesce_usecs_irq = 0;
+ }
+ }
+
+ /* Find lowest IRQ moderation across all used RX queues */
+ coalesce->rx_coalesce_usecs_irq = ~((u32) 0);
+ efx_for_each_rx_queue(rx_queue, efx) {
+ channel = rx_queue->channel;
+ if (channel->irq_moderation < coalesce->rx_coalesce_usecs_irq)
+ coalesce->rx_coalesce_usecs_irq =
+ channel->irq_moderation;
+ }
+
+ return 0;
+}
+
+/* Set coalescing parameters
+ * The difficulties occur for shared channels
+ */
+static int efx_ethtool_set_coalesce(struct net_device *net_dev,
+ struct ethtool_coalesce *coalesce)
+{
+ struct efx_nic *efx = net_dev->priv;
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ unsigned tx_usecs, rx_usecs;
+
+ if (coalesce->use_adaptive_rx_coalesce ||
+ coalesce->use_adaptive_tx_coalesce)
+ return -EOPNOTSUPP;
+
+ if (coalesce->rx_coalesce_usecs || coalesce->tx_coalesce_usecs) {
+ EFX_ERR(efx, "invalid coalescing setting. "
+ "Only rx/tx_coalesce_usecs_irq are supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ rx_usecs = coalesce->rx_coalesce_usecs_irq;
+ tx_usecs = coalesce->tx_coalesce_usecs_irq;
+
+ /* If the channel is shared only allow RX parameters to be set */
+ efx_for_each_tx_queue(tx_queue, efx) {
+ if ((tx_queue->channel->used_flags == EFX_USED_BY_RX_TX) &&
+ tx_usecs) {
+ EFX_ERR(efx, "Channel is shared. "
+ "Only RX coalescing may be set\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ efx_init_irq_moderation(efx, tx_usecs, rx_usecs);
+
+ /* Reset channel to pick up new moderation value. Note that
+ * this may change the value of the irq_moderation field
+ * (e.g. to allow for hardware timer granularity).
+ */
+ efx_for_each_channel(channel, efx)
+ falcon_set_int_moderation(channel);
+
+ return 0;
+}
+
+static int efx_ethtool_set_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct efx_nic *efx = net_dev->priv;
+ enum efx_fc_type flow_control = efx->flow_control;
+ int rc;
+
+ flow_control &= ~(EFX_FC_RX | EFX_FC_TX | EFX_FC_AUTO);
+ flow_control |= pause->rx_pause ? EFX_FC_RX : 0;
+ flow_control |= pause->tx_pause ? EFX_FC_TX : 0;
+ flow_control |= pause->autoneg ? EFX_FC_AUTO : 0;
+
+ /* Try to push the pause parameters */
+ mutex_lock(&efx->mac_lock);
+ rc = falcon_xmac_set_pause(efx, flow_control);
+ mutex_unlock(&efx->mac_lock);
+
+ if (!rc)
+ efx_reconfigure_port(efx);
+
+ return rc;
+}
+
+static void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct efx_nic *efx = net_dev->priv;
+
+ pause->rx_pause = (efx->flow_control & EFX_FC_RX) ? 1 : 0;
+ pause->tx_pause = (efx->flow_control & EFX_FC_TX) ? 1 : 0;
+ pause->autoneg = (efx->flow_control & EFX_FC_AUTO) ? 1 : 0;
+}
+
+
+struct ethtool_ops efx_ethtool_ops = {
+ .get_settings = efx_ethtool_get_settings,
+ .set_settings = efx_ethtool_set_settings,
+ .get_drvinfo = efx_ethtool_get_drvinfo,
+ .nway_reset = efx_ethtool_nway_reset,
+ .get_link = efx_ethtool_get_link,
+ .get_coalesce = efx_ethtool_get_coalesce,
+ .set_coalesce = efx_ethtool_set_coalesce,
+ .get_pauseparam = efx_ethtool_get_pauseparam,
+ .set_pauseparam = efx_ethtool_set_pauseparam,
+ .get_rx_csum = efx_ethtool_get_rx_csum,
+ .set_rx_csum = efx_ethtool_set_rx_csum,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = efx_ethtool_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = ethtool_op_set_sg,
+ .get_flags = ethtool_op_get_flags,
+ .set_flags = ethtool_op_set_flags,
+ .get_strings = efx_ethtool_get_strings,
+ .phys_id = efx_ethtool_phys_id,
+ .get_stats_count = efx_ethtool_get_stats_count,
+ .get_ethtool_stats = efx_ethtool_get_stats,
+};
diff --git a/drivers/net/sfc/ethtool.h b/drivers/net/sfc/ethtool.h
new file mode 100644
index 00000000000..3628e43df14
--- /dev/null
+++ b/drivers/net/sfc/ethtool.h
@@ -0,0 +1,27 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005 Fen Systems Ltd.
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_ETHTOOL_H
+#define EFX_ETHTOOL_H
+
+#include "net_driver.h"
+
+/*
+ * Ethtool support
+ */
+
+extern int efx_ethtool_get_settings(struct net_device *net_dev,
+ struct ethtool_cmd *ecmd);
+extern int efx_ethtool_set_settings(struct net_device *net_dev,
+ struct ethtool_cmd *ecmd);
+
+extern struct ethtool_ops efx_ethtool_ops;
+
+#endif /* EFX_ETHTOOL_H */
diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c
new file mode 100644
index 00000000000..46db549ce58
--- /dev/null
+++ b/drivers/net/sfc/falcon.c
@@ -0,0 +1,2722 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "mac.h"
+#include "gmii.h"
+#include "spi.h"
+#include "falcon.h"
+#include "falcon_hwdefs.h"
+#include "falcon_io.h"
+#include "mdio_10g.h"
+#include "phy.h"
+#include "boards.h"
+#include "workarounds.h"
+
+/* Falcon hardware control.
+ * Falcon is the internal codename for the SFC4000 controller that is
+ * present in SFE400X evaluation boards
+ */
+
+/**
+ * struct falcon_nic_data - Falcon NIC state
+ * @next_buffer_table: First available buffer table id
+ * @pci_dev2: The secondary PCI device if present
+ */
+struct falcon_nic_data {
+ unsigned next_buffer_table;
+ struct pci_dev *pci_dev2;
+};
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ **************************************************************************
+ */
+
+static int disable_dma_stats;
+
+/* This is set to 16 for a good reason. In summary, if larger than
+ * 16, the descriptor cache holds more than a default socket
+ * buffer's worth of packets (for UDP we can only have at most one
+ * socket buffer's worth outstanding). This combined with the fact
+ * that we only get 1 TX event per descriptor cache means the NIC
+ * goes idle.
+ */
+#define TX_DC_ENTRIES 16
+#define TX_DC_ENTRIES_ORDER 0
+#define TX_DC_BASE 0x130000
+
+#define RX_DC_ENTRIES 64
+#define RX_DC_ENTRIES_ORDER 2
+#define RX_DC_BASE 0x100000
+
+/* RX FIFO XOFF watermark
+ *
+ * When the amount of the RX FIFO increases used increases past this
+ * watermark send XOFF. Only used if RX flow control is enabled (ethtool -A)
+ * This also has an effect on RX/TX arbitration
+ */
+static int rx_xoff_thresh_bytes = -1;
+module_param(rx_xoff_thresh_bytes, int, 0644);
+MODULE_PARM_DESC(rx_xoff_thresh_bytes, "RX fifo XOFF threshold");
+
+/* RX FIFO XON watermark
+ *
+ * When the amount of the RX FIFO used decreases below this
+ * watermark send XON. Only used if TX flow control is enabled (ethtool -A)
+ * This also has an effect on RX/TX arbitration
+ */
+static int rx_xon_thresh_bytes = -1;
+module_param(rx_xon_thresh_bytes, int, 0644);
+MODULE_PARM_DESC(rx_xon_thresh_bytes, "RX fifo XON threshold");
+
+/* TX descriptor ring size - min 512 max 4k */
+#define FALCON_TXD_RING_ORDER TX_DESCQ_SIZE_1K
+#define FALCON_TXD_RING_SIZE 1024
+#define FALCON_TXD_RING_MASK (FALCON_TXD_RING_SIZE - 1)
+
+/* RX descriptor ring size - min 512 max 4k */
+#define FALCON_RXD_RING_ORDER RX_DESCQ_SIZE_1K
+#define FALCON_RXD_RING_SIZE 1024
+#define FALCON_RXD_RING_MASK (FALCON_RXD_RING_SIZE - 1)
+
+/* Event queue size - max 32k */
+#define FALCON_EVQ_ORDER EVQ_SIZE_4K
+#define FALCON_EVQ_SIZE 4096
+#define FALCON_EVQ_MASK (FALCON_EVQ_SIZE - 1)
+
+/* Max number of internal errors. After this resets will not be performed */
+#define FALCON_MAX_INT_ERRORS 4
+
+/* Maximum period that we wait for flush events. If the flush event
+ * doesn't arrive in this period of time then we check if the queue
+ * was disabled anyway. */
+#define FALCON_FLUSH_TIMEOUT 10 /* 10ms */
+
+/**************************************************************************
+ *
+ * Falcon constants
+ *
+ **************************************************************************
+ */
+
+/* DMA address mask (up to 46-bit, avoiding compiler warnings)
+ *
+ * Note that it is possible to have a platform with 64-bit longs and
+ * 32-bit DMA addresses, or vice versa. EFX_DMA_MASK takes care of the
+ * platform DMA mask.
+ */
+#if BITS_PER_LONG == 64
+#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffUL)
+#else
+#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffULL)
+#endif
+
+/* TX DMA length mask (13-bit) */
+#define FALCON_TX_DMA_MASK (4096 - 1)
+
+/* Size and alignment of special buffers (4KB) */
+#define FALCON_BUF_SIZE 4096
+
+/* Dummy SRAM size code */
+#define SRM_NB_BSZ_ONCHIP_ONLY (-1)
+
+/* Be nice if these (or equiv.) were in linux/pci_regs.h, but they're not. */
+#define PCI_EXP_DEVCAP_PWR_VAL_LBN 18
+#define PCI_EXP_DEVCAP_PWR_SCL_LBN 26
+#define PCI_EXP_DEVCTL_PAYLOAD_LBN 5
+#define PCI_EXP_LNKSTA_LNK_WID 0x3f0
+#define PCI_EXP_LNKSTA_LNK_WID_LBN 4
+
+#define FALCON_IS_DUAL_FUNC(efx) \
+ (FALCON_REV(efx) < FALCON_REV_B0)
+
+/**************************************************************************
+ *
+ * Falcon hardware access
+ *
+ **************************************************************************/
+
+/* Read the current event from the event queue */
+static inline efx_qword_t *falcon_event(struct efx_channel *channel,
+ unsigned int index)
+{
+ return (((efx_qword_t *) (channel->eventq.addr)) + index);
+}
+
+/* See if an event is present
+ *
+ * We check both the high and low dword of the event for all ones. We
+ * wrote all ones when we cleared the event, and no valid event can
+ * have all ones in either its high or low dwords. This approach is
+ * robust against reordering.
+ *
+ * Note that using a single 64-bit comparison is incorrect; even
+ * though the CPU read will be atomic, the DMA write may not be.
+ */
+static inline int falcon_event_present(efx_qword_t *event)
+{
+ return (!(EFX_DWORD_IS_ALL_ONES(event->dword[0]) |
+ EFX_DWORD_IS_ALL_ONES(event->dword[1])));
+}
+
+/**************************************************************************
+ *
+ * I2C bus - this is a bit-bashing interface using GPIO pins
+ * Note that it uses the output enables to tristate the outputs
+ * SDA is the data pin and SCL is the clock
+ *
+ **************************************************************************
+ */
+static void falcon_setsdascl(struct efx_i2c_interface *i2c)
+{
+ efx_oword_t reg;
+
+ falcon_read(i2c->efx, &reg, GPIO_CTL_REG_KER);
+ EFX_SET_OWORD_FIELD(reg, GPIO0_OEN, (i2c->scl ? 0 : 1));
+ EFX_SET_OWORD_FIELD(reg, GPIO3_OEN, (i2c->sda ? 0 : 1));
+ falcon_write(i2c->efx, &reg, GPIO_CTL_REG_KER);
+}
+
+static int falcon_getsda(struct efx_i2c_interface *i2c)
+{
+ efx_oword_t reg;
+
+ falcon_read(i2c->efx, &reg, GPIO_CTL_REG_KER);
+ return EFX_OWORD_FIELD(reg, GPIO3_IN);
+}
+
+static int falcon_getscl(struct efx_i2c_interface *i2c)
+{
+ efx_oword_t reg;
+
+ falcon_read(i2c->efx, &reg, GPIO_CTL_REG_KER);
+ return EFX_DWORD_FIELD(reg, GPIO0_IN);
+}
+
+static struct efx_i2c_bit_operations falcon_i2c_bit_operations = {
+ .setsda = falcon_setsdascl,
+ .setscl = falcon_setsdascl,
+ .getsda = falcon_getsda,
+ .getscl = falcon_getscl,
+ .udelay = 100,
+ .mdelay = 10,
+};
+
+/**************************************************************************
+ *
+ * Falcon special buffer handling
+ * Special buffers are used for event queues and the TX and RX
+ * descriptor rings.
+ *
+ *************************************************************************/
+
+/*
+ * Initialise a Falcon special buffer
+ *
+ * This will define a buffer (previously allocated via
+ * falcon_alloc_special_buffer()) in Falcon's buffer table, allowing
+ * it to be used for event queues, descriptor rings etc.
+ */
+static int
+falcon_init_special_buffer(struct efx_nic *efx,
+ struct efx_special_buffer *buffer)
+{
+ efx_qword_t buf_desc;
+ int index;
+ dma_addr_t dma_addr;
+ int i;
+
+ EFX_BUG_ON_PARANOID(!buffer->addr);
+
+ /* Write buffer descriptors to NIC */
+ for (i = 0; i < buffer->entries; i++) {
+ index = buffer->index + i;
+ dma_addr = buffer->dma_addr + (i * 4096);
+ EFX_LOG(efx, "mapping special buffer %d at %llx\n",
+ index, (unsigned long long)dma_addr);
+ EFX_POPULATE_QWORD_4(buf_desc,
+ IP_DAT_BUF_SIZE, IP_DAT_BUF_SIZE_4K,
+ BUF_ADR_REGION, 0,
+ BUF_ADR_FBUF, (dma_addr >> 12),
+ BUF_OWNER_ID_FBUF, 0);
+ falcon_write_sram(efx, &buf_desc, index);
+ }
+
+ return 0;
+}
+
+/* Unmaps a buffer from Falcon and clears the buffer table entries */
+static void
+falcon_fini_special_buffer(struct efx_nic *efx,
+ struct efx_special_buffer *buffer)
+{
+ efx_oword_t buf_tbl_upd;
+ unsigned int start = buffer->index;
+ unsigned int end = (buffer->index + buffer->entries - 1);
+
+ if (!buffer->entries)
+ return;
+
+ EFX_LOG(efx, "unmapping special buffers %d-%d\n",
+ buffer->index, buffer->index + buffer->entries - 1);
+
+ EFX_POPULATE_OWORD_4(buf_tbl_upd,
+ BUF_UPD_CMD, 0,
+ BUF_CLR_CMD, 1,
+ BUF_CLR_END_ID, end,
+ BUF_CLR_START_ID, start);
+ falcon_write(efx, &buf_tbl_upd, BUF_TBL_UPD_REG_KER);
+}
+
+/*
+ * Allocate a new Falcon special buffer
+ *
+ * This allocates memory for a new buffer, clears it and allocates a
+ * new buffer ID range. It does not write into Falcon's buffer table.
+ *
+ * This call will allocate 4KB buffers, since Falcon can't use 8KB
+ * buffers for event queues and descriptor rings.
+ */
+static int falcon_alloc_special_buffer(struct efx_nic *efx,
+ struct efx_special_buffer *buffer,
+ unsigned int len)
+{
+ struct falcon_nic_data *nic_data = efx->nic_data;
+
+ len = ALIGN(len, FALCON_BUF_SIZE);
+
+ buffer->addr = pci_alloc_consistent(efx->pci_dev, len,
+ &buffer->dma_addr);
+ if (!buffer->addr)
+ return -ENOMEM;
+ buffer->len = len;
+ buffer->entries = len / FALCON_BUF_SIZE;
+ BUG_ON(buffer->dma_addr & (FALCON_BUF_SIZE - 1));
+
+ /* All zeros is a potentially valid event so memset to 0xff */
+ memset(buffer->addr, 0xff, len);
+
+ /* Select new buffer ID */
+ buffer->index = nic_data->next_buffer_table;
+ nic_data->next_buffer_table += buffer->entries;
+
+ EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x "
+ "(virt %p phys %lx)\n", buffer->index,
+ buffer->index + buffer->entries - 1,
+ (unsigned long long)buffer->dma_addr, len,
+ buffer->addr, virt_to_phys(buffer->addr));
+
+ return 0;
+}
+
+static void falcon_free_special_buffer(struct efx_nic *efx,
+ struct efx_special_buffer *buffer)
+{
+ if (!buffer->addr)
+ return;
+
+ EFX_LOG(efx, "deallocating special buffers %d-%d at %llx+%x "
+ "(virt %p phys %lx)\n", buffer->index,
+ buffer->index + buffer->entries - 1,
+ (unsigned long long)buffer->dma_addr, buffer->len,
+ buffer->addr, virt_to_phys(buffer->addr));
+
+ pci_free_consistent(efx->pci_dev, buffer->len, buffer->addr,
+ buffer->dma_addr);
+ buffer->addr = NULL;
+ buffer->entries = 0;
+}
+
+/**************************************************************************
+ *
+ * Falcon generic buffer handling
+ * These buffers are used for interrupt status and MAC stats
+ *
+ **************************************************************************/
+
+static int falcon_alloc_buffer(struct efx_nic *efx,
+ struct efx_buffer *buffer, unsigned int len)
+{
+ buffer->addr = pci_alloc_consistent(efx->pci_dev, len,
+ &buffer->dma_addr);
+ if (!buffer->addr)
+ return -ENOMEM;
+ buffer->len = len;
+ memset(buffer->addr, 0, len);
+ return 0;
+}
+
+static void falcon_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer)
+{
+ if (buffer->addr) {
+ pci_free_consistent(efx->pci_dev, buffer->len,
+ buffer->addr, buffer->dma_addr);
+ buffer->addr = NULL;
+ }
+}
+
+/**************************************************************************
+ *
+ * Falcon TX path
+ *
+ **************************************************************************/
+
+/* Returns a pointer to the specified transmit descriptor in the TX
+ * descriptor queue belonging to the specified channel.
+ */
+static inline efx_qword_t *falcon_tx_desc(struct efx_tx_queue *tx_queue,
+ unsigned int index)
+{
+ return (((efx_qword_t *) (tx_queue->txd.addr)) + index);
+}
+
+/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
+static inline void falcon_notify_tx_desc(struct efx_tx_queue *tx_queue)
+{
+ unsigned write_ptr;
+ efx_dword_t reg;
+
+ write_ptr = tx_queue->write_count & FALCON_TXD_RING_MASK;
+ EFX_POPULATE_DWORD_1(reg, TX_DESC_WPTR_DWORD, write_ptr);
+ falcon_writel_page(tx_queue->efx, &reg,
+ TX_DESC_UPD_REG_KER_DWORD, tx_queue->queue);
+}
+
+
+/* For each entry inserted into the software descriptor ring, create a
+ * descriptor in the hardware TX descriptor ring (in host memory), and
+ * write a doorbell.
+ */
+void falcon_push_buffers(struct efx_tx_queue *tx_queue)
+{
+
+ struct efx_tx_buffer *buffer;
+ efx_qword_t *txd;
+ unsigned write_ptr;
+
+ BUG_ON(tx_queue->write_count == tx_queue->insert_count);
+
+ do {
+ write_ptr = tx_queue->write_count & FALCON_TXD_RING_MASK;
+ buffer = &tx_queue->buffer[write_ptr];
+ txd = falcon_tx_desc(tx_queue, write_ptr);
+ ++tx_queue->write_count;
+
+ /* Create TX descriptor ring entry */
+ EFX_POPULATE_QWORD_5(*txd,
+ TX_KER_PORT, 0,
+ TX_KER_CONT, buffer->continuation,
+ TX_KER_BYTE_CNT, buffer->len,
+ TX_KER_BUF_REGION, 0,
+ TX_KER_BUF_ADR, buffer->dma_addr);
+ } while (tx_queue->write_count != tx_queue->insert_count);
+
+ wmb(); /* Ensure descriptors are written before they are fetched */
+ falcon_notify_tx_desc(tx_queue);
+}
+
+/* Allocate hardware resources for a TX queue */
+int falcon_probe_tx(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ return falcon_alloc_special_buffer(efx, &tx_queue->txd,
+ FALCON_TXD_RING_SIZE *
+ sizeof(efx_qword_t));
+}
+
+int falcon_init_tx(struct efx_tx_queue *tx_queue)
+{
+ efx_oword_t tx_desc_ptr;
+ struct efx_nic *efx = tx_queue->efx;
+ int rc;
+
+ /* Pin TX descriptor ring */
+ rc = falcon_init_special_buffer(efx, &tx_queue->txd);
+ if (rc)
+ return rc;
+
+ /* Push TX descriptor ring to card */
+ EFX_POPULATE_OWORD_10(tx_desc_ptr,
+ TX_DESCQ_EN, 1,
+ TX_ISCSI_DDIG_EN, 0,
+ TX_ISCSI_HDIG_EN, 0,
+ TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index,
+ TX_DESCQ_EVQ_ID, tx_queue->channel->evqnum,
+ TX_DESCQ_OWNER_ID, 0,
+ TX_DESCQ_LABEL, tx_queue->queue,
+ TX_DESCQ_SIZE, FALCON_TXD_RING_ORDER,
+ TX_DESCQ_TYPE, 0,
+ TX_NON_IP_DROP_DIS_B0, 1);
+
+ if (FALCON_REV(efx) >= FALCON_REV_B0) {
+ int csum = !(efx->net_dev->features & NETIF_F_IP_CSUM);
+ EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_IP_CHKSM_DIS_B0, csum);
+ EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_TCP_CHKSM_DIS_B0, csum);
+ }
+
+ falcon_write_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
+ tx_queue->queue);
+
+ if (FALCON_REV(efx) < FALCON_REV_B0) {
+ efx_oword_t reg;
+
+ BUG_ON(tx_queue->queue >= 128); /* HW limit */
+
+ falcon_read(efx, &reg, TX_CHKSM_CFG_REG_KER_A1);
+ if (efx->net_dev->features & NETIF_F_IP_CSUM)
+ clear_bit_le(tx_queue->queue, (void *)&reg);
+ else
+ set_bit_le(tx_queue->queue, (void *)&reg);
+ falcon_write(efx, &reg, TX_CHKSM_CFG_REG_KER_A1);
+ }
+
+ return 0;
+}
+
+static int falcon_flush_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_channel *channel = &efx->channel[0];
+ efx_oword_t tx_flush_descq;
+ unsigned int read_ptr, i;
+
+ /* Post a flush command */
+ EFX_POPULATE_OWORD_2(tx_flush_descq,
+ TX_FLUSH_DESCQ_CMD, 1,
+ TX_FLUSH_DESCQ, tx_queue->queue);
+ falcon_write(efx, &tx_flush_descq, TX_FLUSH_DESCQ_REG_KER);
+ msleep(FALCON_FLUSH_TIMEOUT);
+
+ if (EFX_WORKAROUND_7803(efx))
+ return 0;
+
+ /* Look for a flush completed event */
+ read_ptr = channel->eventq_read_ptr;
+ for (i = 0; i < FALCON_EVQ_SIZE; ++i) {
+ efx_qword_t *event = falcon_event(channel, read_ptr);
+ int ev_code, ev_sub_code, ev_queue;
+ if (!falcon_event_present(event))
+ break;
+
+ ev_code = EFX_QWORD_FIELD(*event, EV_CODE);
+ ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE);
+ ev_queue = EFX_QWORD_FIELD(*event, DRIVER_EV_TX_DESCQ_ID);
+ if ((ev_sub_code == TX_DESCQ_FLS_DONE_EV_DECODE) &&
+ (ev_queue == tx_queue->queue)) {
+ EFX_LOG(efx, "tx queue %d flush command succesful\n",
+ tx_queue->queue);
+ return 0;
+ }
+
+ read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK;
+ }
+
+ if (EFX_WORKAROUND_11557(efx)) {
+ efx_oword_t reg;
+ int enabled;
+
+ falcon_read_table(efx, &reg, efx->type->txd_ptr_tbl_base,
+ tx_queue->queue);
+ enabled = EFX_OWORD_FIELD(reg, TX_DESCQ_EN);
+ if (!enabled) {
+ EFX_LOG(efx, "tx queue %d disabled without a "
+ "flush event seen\n", tx_queue->queue);
+ return 0;
+ }
+ }
+
+ EFX_ERR(efx, "tx queue %d flush command timed out\n", tx_queue->queue);
+ return -ETIMEDOUT;
+}
+
+void falcon_fini_tx(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ efx_oword_t tx_desc_ptr;
+
+ /* Stop the hardware using the queue */
+ if (falcon_flush_tx_queue(tx_queue))
+ EFX_ERR(efx, "failed to flush tx queue %d\n", tx_queue->queue);
+
+ /* Remove TX descriptor ring from card */
+ EFX_ZERO_OWORD(tx_desc_ptr);
+ falcon_write_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
+ tx_queue->queue);
+
+ /* Unpin TX descriptor ring */
+ falcon_fini_special_buffer(efx, &tx_queue->txd);
+}
+
+/* Free buffers backing TX queue */
+void falcon_remove_tx(struct efx_tx_queue *tx_queue)
+{
+ falcon_free_special_buffer(tx_queue->efx, &tx_queue->txd);
+}
+
+/**************************************************************************
+ *
+ * Falcon RX path
+ *
+ **************************************************************************/
+
+/* Returns a pointer to the specified descriptor in the RX descriptor queue */
+static inline efx_qword_t *falcon_rx_desc(struct efx_rx_queue *rx_queue,
+ unsigned int index)
+{
+ return (((efx_qword_t *) (rx_queue->rxd.addr)) + index);
+}
+
+/* This creates an entry in the RX descriptor queue */
+static inline void falcon_build_rx_desc(struct efx_rx_queue *rx_queue,
+ unsigned index)
+{
+ struct efx_rx_buffer *rx_buf;
+ efx_qword_t *rxd;
+
+ rxd = falcon_rx_desc(rx_queue, index);
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ EFX_POPULATE_QWORD_3(*rxd,
+ RX_KER_BUF_SIZE,
+ rx_buf->len -
+ rx_queue->efx->type->rx_buffer_padding,
+ RX_KER_BUF_REGION, 0,
+ RX_KER_BUF_ADR, rx_buf->dma_addr);
+}
+
+/* This writes to the RX_DESC_WPTR register for the specified receive
+ * descriptor ring.
+ */
+void falcon_notify_rx_desc(struct efx_rx_queue *rx_queue)
+{
+ efx_dword_t reg;
+ unsigned write_ptr;
+
+ while (rx_queue->notified_count != rx_queue->added_count) {
+ falcon_build_rx_desc(rx_queue,
+ rx_queue->notified_count &
+ FALCON_RXD_RING_MASK);
+ ++rx_queue->notified_count;
+ }
+
+ wmb();
+ write_ptr = rx_queue->added_count & FALCON_RXD_RING_MASK;
+ EFX_POPULATE_DWORD_1(reg, RX_DESC_WPTR_DWORD, write_ptr);
+ falcon_writel_page(rx_queue->efx, &reg,
+ RX_DESC_UPD_REG_KER_DWORD, rx_queue->queue);
+}
+
+int falcon_probe_rx(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ return falcon_alloc_special_buffer(efx, &rx_queue->rxd,
+ FALCON_RXD_RING_SIZE *
+ sizeof(efx_qword_t));
+}
+
+int falcon_init_rx(struct efx_rx_queue *rx_queue)
+{
+ efx_oword_t rx_desc_ptr;
+ struct efx_nic *efx = rx_queue->efx;
+ int rc;
+ int is_b0 = FALCON_REV(efx) >= FALCON_REV_B0;
+ int iscsi_digest_en = is_b0;
+
+ EFX_LOG(efx, "RX queue %d ring in special buffers %d-%d\n",
+ rx_queue->queue, rx_queue->rxd.index,
+ rx_queue->rxd.index + rx_queue->rxd.entries - 1);
+
+ /* Pin RX descriptor ring */
+ rc = falcon_init_special_buffer(efx, &rx_queue->rxd);
+ if (rc)
+ return rc;
+
+ /* Push RX descriptor ring to card */
+ EFX_POPULATE_OWORD_10(rx_desc_ptr,
+ RX_ISCSI_DDIG_EN, iscsi_digest_en,
+ RX_ISCSI_HDIG_EN, iscsi_digest_en,
+ RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
+ RX_DESCQ_EVQ_ID, rx_queue->channel->evqnum,
+ RX_DESCQ_OWNER_ID, 0,
+ RX_DESCQ_LABEL, rx_queue->queue,
+ RX_DESCQ_SIZE, FALCON_RXD_RING_ORDER,
+ RX_DESCQ_TYPE, 0 /* kernel queue */ ,
+ /* For >=B0 this is scatter so disable */
+ RX_DESCQ_JUMBO, !is_b0,
+ RX_DESCQ_EN, 1);
+ falcon_write_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+ rx_queue->queue);
+ return 0;
+}
+
+static int falcon_flush_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_channel *channel = &efx->channel[0];
+ unsigned int read_ptr, i;
+ efx_oword_t rx_flush_descq;
+
+ /* Post a flush command */
+ EFX_POPULATE_OWORD_2(rx_flush_descq,
+ RX_FLUSH_DESCQ_CMD, 1,
+ RX_FLUSH_DESCQ, rx_queue->queue);
+ falcon_write(efx, &rx_flush_descq, RX_FLUSH_DESCQ_REG_KER);
+ msleep(FALCON_FLUSH_TIMEOUT);
+
+ if (EFX_WORKAROUND_7803(efx))
+ return 0;
+
+ /* Look for a flush completed event */
+ read_ptr = channel->eventq_read_ptr;
+ for (i = 0; i < FALCON_EVQ_SIZE; ++i) {
+ efx_qword_t *event = falcon_event(channel, read_ptr);
+ int ev_code, ev_sub_code, ev_queue, ev_failed;
+ if (!falcon_event_present(event))
+ break;
+
+ ev_code = EFX_QWORD_FIELD(*event, EV_CODE);
+ ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE);
+ ev_queue = EFX_QWORD_FIELD(*event, DRIVER_EV_RX_DESCQ_ID);
+ ev_failed = EFX_QWORD_FIELD(*event, DRIVER_EV_RX_FLUSH_FAIL);
+
+ if ((ev_sub_code == RX_DESCQ_FLS_DONE_EV_DECODE) &&
+ (ev_queue == rx_queue->queue)) {
+ if (ev_failed) {
+ EFX_INFO(efx, "rx queue %d flush command "
+ "failed\n", rx_queue->queue);
+ return -EAGAIN;
+ } else {
+ EFX_LOG(efx, "rx queue %d flush command "
+ "succesful\n", rx_queue->queue);
+ return 0;
+ }
+ }
+
+ read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK;
+ }
+
+ if (EFX_WORKAROUND_11557(efx)) {
+ efx_oword_t reg;
+ int enabled;
+
+ falcon_read_table(efx, &reg, efx->type->rxd_ptr_tbl_base,
+ rx_queue->queue);
+ enabled = EFX_OWORD_FIELD(reg, RX_DESCQ_EN);
+ if (!enabled) {
+ EFX_LOG(efx, "rx queue %d disabled without a "
+ "flush event seen\n", rx_queue->queue);
+ return 0;
+ }
+ }
+
+ EFX_ERR(efx, "rx queue %d flush command timed out\n", rx_queue->queue);
+ return -ETIMEDOUT;
+}
+
+void falcon_fini_rx(struct efx_rx_queue *rx_queue)
+{
+ efx_oword_t rx_desc_ptr;
+ struct efx_nic *efx = rx_queue->efx;
+ int i, rc;
+
+ /* Try and flush the rx queue. This may need to be repeated */
+ for (i = 0; i < 5; i++) {
+ rc = falcon_flush_rx_queue(rx_queue);
+ if (rc == -EAGAIN)
+ continue;
+ break;
+ }
+ if (rc)
+ EFX_ERR(efx, "failed to flush rx queue %d\n", rx_queue->queue);
+
+ /* Remove RX descriptor ring from card */
+ EFX_ZERO_OWORD(rx_desc_ptr);
+ falcon_write_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+ rx_queue->queue);
+
+ /* Unpin RX descriptor ring */
+ falcon_fini_special_buffer(efx, &rx_queue->rxd);
+}
+
+/* Free buffers backing RX queue */
+void falcon_remove_rx(struct efx_rx_queue *rx_queue)
+{
+ falcon_free_special_buffer(rx_queue->efx, &rx_queue->rxd);
+}
+
+/**************************************************************************
+ *
+ * Falcon event queue processing
+ * Event queues are processed by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Update a channel's event queue's read pointer (RPTR) register
+ *
+ * This writes the EVQ_RPTR_REG register for the specified channel's
+ * event queue.
+ *
+ * Note that EVQ_RPTR_REG contains the index of the "last read" event,
+ * whereas channel->eventq_read_ptr contains the index of the "next to
+ * read" event.
+ */
+void falcon_eventq_read_ack(struct efx_channel *channel)
+{
+ efx_dword_t reg;
+ struct efx_nic *efx = channel->efx;
+
+ EFX_POPULATE_DWORD_1(reg, EVQ_RPTR_DWORD, channel->eventq_read_ptr);
+ falcon_writel_table(efx, &reg, efx->type->evq_rptr_tbl_base,
+ channel->evqnum);
+}
+
+/* Use HW to insert a SW defined event */
+void falcon_generate_event(struct efx_channel *channel, efx_qword_t *event)
+{
+ efx_oword_t drv_ev_reg;
+
+ EFX_POPULATE_OWORD_2(drv_ev_reg,
+ DRV_EV_QID, channel->evqnum,
+ DRV_EV_DATA,
+ EFX_QWORD_FIELD64(*event, WHOLE_EVENT));
+ falcon_write(channel->efx, &drv_ev_reg, DRV_EV_REG_KER);
+}
+
+/* Handle a transmit completion event
+ *
+ * Falcon batches TX completion events; the message we receive is of
+ * the form "complete all TX events up to this index".
+ */
+static inline void falcon_handle_tx_event(struct efx_channel *channel,
+ efx_qword_t *event)
+{
+ unsigned int tx_ev_desc_ptr;
+ unsigned int tx_ev_q_label;
+ struct efx_tx_queue *tx_queue;
+ struct efx_nic *efx = channel->efx;
+
+ if (likely(EFX_QWORD_FIELD(*event, TX_EV_COMP))) {
+ /* Transmit completion */
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, TX_EV_DESC_PTR);
+ tx_ev_q_label = EFX_QWORD_FIELD(*event, TX_EV_Q_LABEL);
+ tx_queue = &efx->tx_queue[tx_ev_q_label];
+ efx_xmit_done(tx_queue, tx_ev_desc_ptr);
+ } else if (EFX_QWORD_FIELD(*event, TX_EV_WQ_FF_FULL)) {
+ /* Rewrite the FIFO write pointer */
+ tx_ev_q_label = EFX_QWORD_FIELD(*event, TX_EV_Q_LABEL);
+ tx_queue = &efx->tx_queue[tx_ev_q_label];
+
+ if (NET_DEV_REGISTERED(efx))
+ netif_tx_lock(efx->net_dev);
+ falcon_notify_tx_desc(tx_queue);
+ if (NET_DEV_REGISTERED(efx))
+ netif_tx_unlock(efx->net_dev);
+ } else if (EFX_QWORD_FIELD(*event, TX_EV_PKT_ERR) &&
+ EFX_WORKAROUND_10727(efx)) {
+ efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH);
+ } else {
+ EFX_ERR(efx, "channel %d unexpected TX event "
+ EFX_QWORD_FMT"\n", channel->channel,
+ EFX_QWORD_VAL(*event));
+ }
+}
+
+/* Check received packet's destination MAC address. */
+static int check_dest_mac(struct efx_rx_queue *rx_queue,
+ const efx_qword_t *event)
+{
+ struct efx_rx_buffer *rx_buf;
+ struct efx_nic *efx = rx_queue->efx;
+ int rx_ev_desc_ptr;
+ struct ethhdr *eh;
+
+ if (efx->promiscuous)
+ return 1;
+
+ rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, RX_EV_DESC_PTR);
+ rx_buf = efx_rx_buffer(rx_queue, rx_ev_desc_ptr);
+ eh = (struct ethhdr *)rx_buf->data;
+ if (memcmp(eh->h_dest, efx->net_dev->dev_addr, ETH_ALEN))
+ return 0;
+ return 1;
+}
+
+/* Detect errors included in the rx_evt_pkt_ok bit. */
+static void falcon_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
+ const efx_qword_t *event,
+ unsigned *rx_ev_pkt_ok,
+ int *discard, int byte_count)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
+ unsigned rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
+ unsigned rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
+ unsigned rx_ev_pkt_type, rx_ev_other_err, rx_ev_pause_frm;
+ unsigned rx_ev_ip_frag_err, rx_ev_hdr_type, rx_ev_mcast_pkt;
+ int snap, non_ip;
+
+ rx_ev_hdr_type = EFX_QWORD_FIELD(*event, RX_EV_HDR_TYPE);
+ rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, RX_EV_MCAST_PKT);
+ rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, RX_EV_TOBE_DISC);
+ rx_ev_pkt_type = EFX_QWORD_FIELD(*event, RX_EV_PKT_TYPE);
+ rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event,
+ RX_EV_BUF_OWNER_ID_ERR);
+ rx_ev_ip_frag_err = EFX_QWORD_FIELD(*event, RX_EV_IF_FRAG_ERR);
+ rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event,
+ RX_EV_IP_HDR_CHKSUM_ERR);
+ rx_ev_tcp_udp_chksum_err = EFX_QWORD_FIELD(*event,
+ RX_EV_TCP_UDP_CHKSUM_ERR);
+ rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, RX_EV_ETH_CRC_ERR);
+ rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, RX_EV_FRM_TRUNC);
+ rx_ev_drib_nib = ((FALCON_REV(efx) >= FALCON_REV_B0) ?
+ 0 : EFX_QWORD_FIELD(*event, RX_EV_DRIB_NIB));
+ rx_ev_pause_frm = EFX_QWORD_FIELD(*event, RX_EV_PAUSE_FRM_ERR);
+
+ /* Every error apart from tobe_disc and pause_frm */
+ rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+ rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
+ rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
+
+ snap = (rx_ev_pkt_type == RX_EV_PKT_TYPE_LLC_DECODE) ||
+ (rx_ev_pkt_type == RX_EV_PKT_TYPE_VLAN_LLC_DECODE);
+ non_ip = (rx_ev_hdr_type == RX_EV_HDR_TYPE_NON_IP_DECODE);
+
+ /* SFC bug 5475/8970: The Falcon XMAC incorrectly calculates the
+ * length field of an LLC frame, which sets TOBE_DISC. We could set
+ * PASS_LEN_ERR, but we want the MAC to filter out short frames (to
+ * protect the RX block).
+ *
+ * bug5475 - LLC/SNAP: Falcon identifies SNAP packets.
+ * bug8970 - LLC/noSNAP: Falcon does not provide an LLC flag.
+ * LLC can't encapsulate IP, so by definition
+ * these packets are NON_IP.
+ *
+ * Unicast mismatch will also cause TOBE_DISC, so the driver needs
+ * to check this.
+ */
+ if (EFX_WORKAROUND_5475(efx) && rx_ev_tobe_disc && (snap || non_ip)) {
+ /* If all the other flags are zero then we can state the
+ * entire packet is ok, which will flag to the kernel not
+ * to recalculate checksums.
+ */
+ if (!(non_ip | rx_ev_other_err | rx_ev_pause_frm))
+ *rx_ev_pkt_ok = 1;
+
+ rx_ev_tobe_disc = 0;
+
+ /* TOBE_DISC is set for unicast mismatch. But given that
+ * we can't trust TOBE_DISC here, we must validate the dest
+ * MAC address ourselves.
+ */
+ if (!rx_ev_mcast_pkt && !check_dest_mac(rx_queue, event))
+ rx_ev_tobe_disc = 1;
+ }
+
+ /* Count errors that are not in MAC stats. */
+ if (rx_ev_frm_trunc)
+ ++rx_queue->channel->n_rx_frm_trunc;
+ else if (rx_ev_tobe_disc)
+ ++rx_queue->channel->n_rx_tobe_disc;
+ else if (rx_ev_ip_hdr_chksum_err)
+ ++rx_queue->channel->n_rx_ip_hdr_chksum_err;
+ else if (rx_ev_tcp_udp_chksum_err)
+ ++rx_queue->channel->n_rx_tcp_udp_chksum_err;
+ if (rx_ev_ip_frag_err)
+ ++rx_queue->channel->n_rx_ip_frag_err;
+
+ /* The frame must be discarded if any of these are true. */
+ *discard = (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib |
+ rx_ev_tobe_disc | rx_ev_pause_frm);
+
+ /* TOBE_DISC is expected on unicast mismatches; don't print out an
+ * error message. FRM_TRUNC indicates RXDP dropped the packet due
+ * to a FIFO overflow.
+ */
+#ifdef EFX_ENABLE_DEBUG
+ if (rx_ev_other_err) {
+ EFX_INFO_RL(efx, " RX queue %d unexpected RX event "
+ EFX_QWORD_FMT "%s%s%s%s%s%s%s%s%s\n",
+ rx_queue->queue, EFX_QWORD_VAL(*event),
+ rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
+ rx_ev_ip_hdr_chksum_err ?
+ " [IP_HDR_CHKSUM_ERR]" : "",
+ rx_ev_tcp_udp_chksum_err ?
+ " [TCP_UDP_CHKSUM_ERR]" : "",
+ rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
+ rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
+ rx_ev_drib_nib ? " [DRIB_NIB]" : "",
+ rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
+ rx_ev_pause_frm ? " [PAUSE]" : "",
+ snap ? " [SNAP/LLC]" : "");
+ }
+#endif
+
+ if (unlikely(rx_ev_eth_crc_err && EFX_WORKAROUND_10750(efx) &&
+ efx->phy_type == PHY_TYPE_10XPRESS))
+ tenxpress_crc_err(efx);
+}
+
+/* Handle receive events that are not in-order. */
+static void falcon_handle_rx_bad_index(struct efx_rx_queue *rx_queue,
+ unsigned index)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned expected, dropped;
+
+ expected = rx_queue->removed_count & FALCON_RXD_RING_MASK;
+ dropped = ((index + FALCON_RXD_RING_SIZE - expected) &
+ FALCON_RXD_RING_MASK);
+ EFX_INFO(efx, "dropped %d events (index=%d expected=%d)\n",
+ dropped, index, expected);
+
+ efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
+ RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+}
+
+/* Handle a packet received event
+ *
+ * Falcon silicon gives a "discard" flag if it's a unicast packet with the
+ * wrong destination address
+ * Also "is multicast" and "matches multicast filter" flags can be used to
+ * discard non-matching multicast packets.
+ */
+static inline int falcon_handle_rx_event(struct efx_channel *channel,
+ const efx_qword_t *event)
+{
+ unsigned int rx_ev_q_label, rx_ev_desc_ptr, rx_ev_byte_cnt;
+ unsigned int rx_ev_pkt_ok, rx_ev_hdr_type, rx_ev_mcast_pkt;
+ unsigned expected_ptr;
+ int discard = 0, checksummed;
+ struct efx_rx_queue *rx_queue;
+ struct efx_nic *efx = channel->efx;
+
+ /* Basic packet information */
+ rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, RX_EV_BYTE_CNT);
+ rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, RX_EV_PKT_OK);
+ rx_ev_hdr_type = EFX_QWORD_FIELD(*event, RX_EV_HDR_TYPE);
+ WARN_ON(EFX_QWORD_FIELD(*event, RX_EV_JUMBO_CONT));
+ WARN_ON(EFX_QWORD_FIELD(*event, RX_EV_SOP) != 1);
+
+ rx_ev_q_label = EFX_QWORD_FIELD(*event, RX_EV_Q_LABEL);
+ rx_queue = &efx->rx_queue[rx_ev_q_label];
+
+ rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, RX_EV_DESC_PTR);
+ expected_ptr = rx_queue->removed_count & FALCON_RXD_RING_MASK;
+ if (unlikely(rx_ev_desc_ptr != expected_ptr)) {
+ falcon_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr);
+ return rx_ev_q_label;
+ }
+
+ if (likely(rx_ev_pkt_ok)) {
+ /* If packet is marked as OK and packet type is TCP/IPv4 or
+ * UDP/IPv4, then we can rely on the hardware checksum.
+ */
+ checksummed = RX_EV_HDR_TYPE_HAS_CHECKSUMS(rx_ev_hdr_type);
+ } else {
+ falcon_handle_rx_not_ok(rx_queue, event, &rx_ev_pkt_ok,
+ &discard, rx_ev_byte_cnt);
+ checksummed = 0;
+ }
+
+ /* Detect multicast packets that didn't match the filter */
+ rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, RX_EV_MCAST_PKT);
+ if (rx_ev_mcast_pkt) {
+ unsigned int rx_ev_mcast_hash_match =
+ EFX_QWORD_FIELD(*event, RX_EV_MCAST_HASH_MATCH);
+
+ if (unlikely(!rx_ev_mcast_hash_match))
+ discard = 1;
+ }
+
+ /* Handle received packet */
+ efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt,
+ checksummed, discard);
+
+ return rx_ev_q_label;
+}
+
+/* Global events are basically PHY events */
+static void falcon_handle_global_event(struct efx_channel *channel,
+ efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ int is_phy_event = 0, handled = 0;
+
+ /* Check for interrupt on either port. Some boards have a
+ * single PHY wired to the interrupt line for port 1. */
+ if (EFX_QWORD_FIELD(*event, G_PHY0_INTR) ||
+ EFX_QWORD_FIELD(*event, G_PHY1_INTR) ||
+ EFX_QWORD_FIELD(*event, XG_PHY_INTR))
+ is_phy_event = 1;
+
+ if ((FALCON_REV(efx) >= FALCON_REV_B0) &&
+ EFX_OWORD_FIELD(*event, XG_MNT_INTR_B0))
+ is_phy_event = 1;
+
+ if (is_phy_event) {
+ efx->phy_op->clear_interrupt(efx);
+ queue_work(efx->workqueue, &efx->reconfigure_work);
+ handled = 1;
+ }
+
+ if (EFX_QWORD_FIELD_VER(efx, *event, RX_RECOVERY)) {
+ EFX_ERR(efx, "channel %d seen global RX_RESET "
+ "event. Resetting.\n", channel->channel);
+
+ atomic_inc(&efx->rx_reset);
+ efx_schedule_reset(efx, EFX_WORKAROUND_6555(efx) ?
+ RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+ handled = 1;
+ }
+
+ if (!handled)
+ EFX_ERR(efx, "channel %d unknown global event "
+ EFX_QWORD_FMT "\n", channel->channel,
+ EFX_QWORD_VAL(*event));
+}
+
+static void falcon_handle_driver_event(struct efx_channel *channel,
+ efx_qword_t *event)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned int ev_sub_code;
+ unsigned int ev_sub_data;
+
+ ev_sub_code = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_CODE);
+ ev_sub_data = EFX_QWORD_FIELD(*event, DRIVER_EV_SUB_DATA);
+
+ switch (ev_sub_code) {
+ case TX_DESCQ_FLS_DONE_EV_DECODE:
+ EFX_TRACE(efx, "channel %d TXQ %d flushed\n",
+ channel->channel, ev_sub_data);
+ break;
+ case RX_DESCQ_FLS_DONE_EV_DECODE:
+ EFX_TRACE(efx, "channel %d RXQ %d flushed\n",
+ channel->channel, ev_sub_data);
+ break;
+ case EVQ_INIT_DONE_EV_DECODE:
+ EFX_LOG(efx, "channel %d EVQ %d initialised\n",
+ channel->channel, ev_sub_data);
+ break;
+ case SRM_UPD_DONE_EV_DECODE:
+ EFX_TRACE(efx, "channel %d SRAM update done\n",
+ channel->channel);
+ break;
+ case WAKE_UP_EV_DECODE:
+ EFX_TRACE(efx, "channel %d RXQ %d wakeup event\n",
+ channel->channel, ev_sub_data);
+ break;
+ case TIMER_EV_DECODE:
+ EFX_TRACE(efx, "channel %d RX queue %d timer expired\n",
+ channel->channel, ev_sub_data);
+ break;
+ case RX_RECOVERY_EV_DECODE:
+ EFX_ERR(efx, "channel %d seen DRIVER RX_RESET event. "
+ "Resetting.\n", channel->channel);
+ efx_schedule_reset(efx,
+ EFX_WORKAROUND_6555(efx) ?
+ RESET_TYPE_RX_RECOVERY :
+ RESET_TYPE_DISABLE);
+ break;
+ case RX_DSC_ERROR_EV_DECODE:
+ EFX_ERR(efx, "RX DMA Q %d reports descriptor fetch error."
+ " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data);
+ efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH);
+ break;
+ case TX_DSC_ERROR_EV_DECODE:
+ EFX_ERR(efx, "TX DMA Q %d reports descriptor fetch error."
+ " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data);
+ efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH);
+ break;
+ default:
+ EFX_TRACE(efx, "channel %d unknown driver event code %d "
+ "data %04x\n", channel->channel, ev_sub_code,
+ ev_sub_data);
+ break;
+ }
+}
+
+int falcon_process_eventq(struct efx_channel *channel, int *rx_quota)
+{
+ unsigned int read_ptr;
+ efx_qword_t event, *p_event;
+ int ev_code;
+ int rxq;
+ int rxdmaqs = 0;
+
+ read_ptr = channel->eventq_read_ptr;
+
+ do {
+ p_event = falcon_event(channel, read_ptr);
+ event = *p_event;
+
+ if (!falcon_event_present(&event))
+ /* End of events */
+ break;
+
+ EFX_TRACE(channel->efx, "channel %d event is "EFX_QWORD_FMT"\n",
+ channel->channel, EFX_QWORD_VAL(event));
+
+ /* Clear this event by marking it all ones */
+ EFX_SET_QWORD(*p_event);
+
+ ev_code = EFX_QWORD_FIELD(event, EV_CODE);
+
+ switch (ev_code) {
+ case RX_IP_EV_DECODE:
+ rxq = falcon_handle_rx_event(channel, &event);
+ rxdmaqs |= (1 << rxq);
+ (*rx_quota)--;
+ break;
+ case TX_IP_EV_DECODE:
+ falcon_handle_tx_event(channel, &event);
+ break;
+ case DRV_GEN_EV_DECODE:
+ channel->eventq_magic
+ = EFX_QWORD_FIELD(event, EVQ_MAGIC);
+ EFX_LOG(channel->efx, "channel %d received generated "
+ "event "EFX_QWORD_FMT"\n", channel->channel,
+ EFX_QWORD_VAL(event));
+ break;
+ case GLOBAL_EV_DECODE:
+ falcon_handle_global_event(channel, &event);
+ break;
+ case DRIVER_EV_DECODE:
+ falcon_handle_driver_event(channel, &event);
+ break;
+ default:
+ EFX_ERR(channel->efx, "channel %d unknown event type %d"
+ " (data " EFX_QWORD_FMT ")\n", channel->channel,
+ ev_code, EFX_QWORD_VAL(event));
+ }
+
+ /* Increment read pointer */
+ read_ptr = (read_ptr + 1) & FALCON_EVQ_MASK;
+
+ } while (*rx_quota);
+
+ channel->eventq_read_ptr = read_ptr;
+ return rxdmaqs;
+}
+
+void falcon_set_int_moderation(struct efx_channel *channel)
+{
+ efx_dword_t timer_cmd;
+ struct efx_nic *efx = channel->efx;
+
+ /* Set timer register */
+ if (channel->irq_moderation) {
+ /* Round to resolution supported by hardware. The value we
+ * program is based at 0. So actual interrupt moderation
+ * achieved is ((x + 1) * res).
+ */
+ unsigned int res = 5;
+ channel->irq_moderation -= (channel->irq_moderation % res);
+ if (channel->irq_moderation < res)
+ channel->irq_moderation = res;
+ EFX_POPULATE_DWORD_2(timer_cmd,
+ TIMER_MODE, TIMER_MODE_INT_HLDOFF,
+ TIMER_VAL,
+ (channel->irq_moderation / res) - 1);
+ } else {
+ EFX_POPULATE_DWORD_2(timer_cmd,
+ TIMER_MODE, TIMER_MODE_DIS,
+ TIMER_VAL, 0);
+ }
+ falcon_writel_page_locked(efx, &timer_cmd, TIMER_CMD_REG_KER,
+ channel->evqnum);
+
+}
+
+/* Allocate buffer table entries for event queue */
+int falcon_probe_eventq(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned int evq_size;
+
+ evq_size = FALCON_EVQ_SIZE * sizeof(efx_qword_t);
+ return falcon_alloc_special_buffer(efx, &channel->eventq, evq_size);
+}
+
+int falcon_init_eventq(struct efx_channel *channel)
+{
+ efx_oword_t evq_ptr;
+ struct efx_nic *efx = channel->efx;
+ int rc;
+
+ EFX_LOG(efx, "channel %d event queue in special buffers %d-%d\n",
+ channel->channel, channel->eventq.index,
+ channel->eventq.index + channel->eventq.entries - 1);
+
+ /* Pin event queue buffer */
+ rc = falcon_init_special_buffer(efx, &channel->eventq);
+ if (rc)
+ return rc;
+
+ /* Fill event queue with all ones (i.e. empty events) */
+ memset(channel->eventq.addr, 0xff, channel->eventq.len);
+
+ /* Push event queue to card */
+ EFX_POPULATE_OWORD_3(evq_ptr,
+ EVQ_EN, 1,
+ EVQ_SIZE, FALCON_EVQ_ORDER,
+ EVQ_BUF_BASE_ID, channel->eventq.index);
+ falcon_write_table(efx, &evq_ptr, efx->type->evq_ptr_tbl_base,
+ channel->evqnum);
+
+ falcon_set_int_moderation(channel);
+
+ return 0;
+}
+
+void falcon_fini_eventq(struct efx_channel *channel)
+{
+ efx_oword_t eventq_ptr;
+ struct efx_nic *efx = channel->efx;
+
+ /* Remove event queue from card */
+ EFX_ZERO_OWORD(eventq_ptr);
+ falcon_write_table(efx, &eventq_ptr, efx->type->evq_ptr_tbl_base,
+ channel->evqnum);
+
+ /* Unpin event queue */
+ falcon_fini_special_buffer(efx, &channel->eventq);
+}
+
+/* Free buffers backing event queue */
+void falcon_remove_eventq(struct efx_channel *channel)
+{
+ falcon_free_special_buffer(channel->efx, &channel->eventq);
+}
+
+
+/* Generates a test event on the event queue. A subsequent call to
+ * process_eventq() should pick up the event and place the value of
+ * "magic" into channel->eventq_magic;
+ */
+void falcon_generate_test_event(struct efx_channel *channel, unsigned int magic)
+{
+ efx_qword_t test_event;
+
+ EFX_POPULATE_QWORD_2(test_event,
+ EV_CODE, DRV_GEN_EV_DECODE,
+ EVQ_MAGIC, magic);
+ falcon_generate_event(channel, &test_event);
+}
+
+
+/**************************************************************************
+ *
+ * Falcon hardware interrupts
+ * The hardware interrupt handler does very little work; all the event
+ * queue processing is carried out by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Enable/disable/generate Falcon interrupts */
+static inline void falcon_interrupts(struct efx_nic *efx, int enabled,
+ int force)
+{
+ efx_oword_t int_en_reg_ker;
+
+ EFX_POPULATE_OWORD_2(int_en_reg_ker,
+ KER_INT_KER, force,
+ DRV_INT_EN_KER, enabled);
+ falcon_write(efx, &int_en_reg_ker, INT_EN_REG_KER);
+}
+
+void falcon_enable_interrupts(struct efx_nic *efx)
+{
+ efx_oword_t int_adr_reg_ker;
+ struct efx_channel *channel;
+
+ EFX_ZERO_OWORD(*((efx_oword_t *) efx->irq_status.addr));
+ wmb(); /* Ensure interrupt vector is clear before interrupts enabled */
+
+ /* Program address */
+ EFX_POPULATE_OWORD_2(int_adr_reg_ker,
+ NORM_INT_VEC_DIS_KER, EFX_INT_MODE_USE_MSI(efx),
+ INT_ADR_KER, efx->irq_status.dma_addr);
+ falcon_write(efx, &int_adr_reg_ker, INT_ADR_REG_KER);
+
+ /* Enable interrupts */
+ falcon_interrupts(efx, 1, 0);
+
+ /* Force processing of all the channels to get the EVQ RPTRs up to
+ date */
+ efx_for_each_channel_with_interrupt(channel, efx)
+ efx_schedule_channel(channel);
+}
+
+void falcon_disable_interrupts(struct efx_nic *efx)
+{
+ /* Disable interrupts */
+ falcon_interrupts(efx, 0, 0);
+}
+
+/* Generate a Falcon test interrupt
+ * Interrupt must already have been enabled, otherwise nasty things
+ * may happen.
+ */
+void falcon_generate_interrupt(struct efx_nic *efx)
+{
+ falcon_interrupts(efx, 1, 1);
+}
+
+/* Acknowledge a legacy interrupt from Falcon
+ *
+ * This acknowledges a legacy (not MSI) interrupt via INT_ACK_KER_REG.
+ *
+ * Due to SFC bug 3706 (silicon revision <=A1) reads can be duplicated in the
+ * BIU. Interrupt acknowledge is read sensitive so must write instead
+ * (then read to ensure the BIU collector is flushed)
+ *
+ * NB most hardware supports MSI interrupts
+ */
+static inline void falcon_irq_ack_a1(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+
+ EFX_POPULATE_DWORD_1(reg, INT_ACK_DUMMY_DATA, 0xb7eb7e);
+ falcon_writel(efx, &reg, INT_ACK_REG_KER_A1);
+ falcon_readl(efx, &reg, WORK_AROUND_BROKEN_PCI_READS_REG_KER_A1);
+}
+
+/* Process a fatal interrupt
+ * Disable bus mastering ASAP and schedule a reset
+ */
+static irqreturn_t falcon_fatal_interrupt(struct efx_nic *efx)
+{
+ struct falcon_nic_data *nic_data = efx->nic_data;
+ efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr;
+ efx_oword_t fatal_intr;
+ int error, mem_perr;
+ static int n_int_errors;
+
+ falcon_read(efx, &fatal_intr, FATAL_INTR_REG_KER);
+ error = EFX_OWORD_FIELD(fatal_intr, INT_KER_ERROR);
+
+ EFX_ERR(efx, "SYSTEM ERROR " EFX_OWORD_FMT " status "
+ EFX_OWORD_FMT ": %s\n", EFX_OWORD_VAL(*int_ker),
+ EFX_OWORD_VAL(fatal_intr),
+ error ? "disabling bus mastering" : "no recognised error");
+ if (error == 0)
+ goto out;
+
+ /* If this is a memory parity error dump which blocks are offending */
+ mem_perr = EFX_OWORD_FIELD(fatal_intr, MEM_PERR_INT_KER);
+ if (mem_perr) {
+ efx_oword_t reg;
+ falcon_read(efx, &reg, MEM_STAT_REG_KER);
+ EFX_ERR(efx, "SYSTEM ERROR: memory parity error "
+ EFX_OWORD_FMT "\n", EFX_OWORD_VAL(reg));
+ }
+
+ /* Disable DMA bus mastering on both devices */
+ pci_disable_device(efx->pci_dev);
+ if (FALCON_IS_DUAL_FUNC(efx))
+ pci_disable_device(nic_data->pci_dev2);
+
+ if (++n_int_errors < FALCON_MAX_INT_ERRORS) {
+ EFX_ERR(efx, "SYSTEM ERROR - reset scheduled\n");
+ efx_schedule_reset(efx, RESET_TYPE_INT_ERROR);
+ } else {
+ EFX_ERR(efx, "SYSTEM ERROR - max number of errors seen."
+ "NIC will be disabled\n");
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ }
+out:
+ return IRQ_HANDLED;
+}
+
+/* Handle a legacy interrupt from Falcon
+ * Acknowledges the interrupt and schedule event queue processing.
+ */
+static irqreturn_t falcon_legacy_interrupt_b0(int irq, void *dev_id)
+{
+ struct efx_nic *efx = (struct efx_nic *)dev_id;
+ efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr;
+ struct efx_channel *channel;
+ efx_dword_t reg;
+ u32 queues;
+ int syserr;
+
+ /* Read the ISR which also ACKs the interrupts */
+ falcon_readl(efx, &reg, INT_ISR0_B0);
+ queues = EFX_EXTRACT_DWORD(reg, 0, 31);
+
+ /* Check to see if we have a serious error condition */
+ syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT);
+ if (unlikely(syserr))
+ return falcon_fatal_interrupt(efx);
+
+ if (queues == 0)
+ return IRQ_NONE;
+
+ efx->last_irq_cpu = raw_smp_processor_id();
+ EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg));
+
+ /* Schedule processing of any interrupting queues */
+ channel = &efx->channel[0];
+ while (queues) {
+ if (queues & 0x01)
+ efx_schedule_channel(channel);
+ channel++;
+ queues >>= 1;
+ }
+
+ return IRQ_HANDLED;
+}
+
+
+static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
+{
+ struct efx_nic *efx = (struct efx_nic *)dev_id;
+ efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr;
+ struct efx_channel *channel;
+ int syserr;
+ int queues;
+
+ /* Check to see if this is our interrupt. If it isn't, we
+ * exit without having touched the hardware.
+ */
+ if (unlikely(EFX_OWORD_IS_ZERO(*int_ker))) {
+ EFX_TRACE(efx, "IRQ %d on CPU %d not for me\n", irq,
+ raw_smp_processor_id());
+ return IRQ_NONE;
+ }
+ efx->last_irq_cpu = raw_smp_processor_id();
+ EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
+
+ /* Check to see if we have a serious error condition */
+ syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT);
+ if (unlikely(syserr))
+ return falcon_fatal_interrupt(efx);
+
+ /* Determine interrupting queues, clear interrupt status
+ * register and acknowledge the device interrupt.
+ */
+ BUILD_BUG_ON(INT_EVQS_WIDTH > EFX_MAX_CHANNELS);
+ queues = EFX_OWORD_FIELD(*int_ker, INT_EVQS);
+ EFX_ZERO_OWORD(*int_ker);
+ wmb(); /* Ensure the vector is cleared before interrupt ack */
+ falcon_irq_ack_a1(efx);
+
+ /* Schedule processing of any interrupting queues */
+ channel = &efx->channel[0];
+ while (queues) {
+ if (queues & 0x01)
+ efx_schedule_channel(channel);
+ channel++;
+ queues >>= 1;
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* Handle an MSI interrupt from Falcon
+ *
+ * Handle an MSI hardware interrupt. This routine schedules event
+ * queue processing. No interrupt acknowledgement cycle is necessary.
+ * Also, we never need to check that the interrupt is for us, since
+ * MSI interrupts cannot be shared.
+ */
+static irqreturn_t falcon_msi_interrupt(int irq, void *dev_id)
+{
+ struct efx_channel *channel = (struct efx_channel *)dev_id;
+ struct efx_nic *efx = channel->efx;
+ efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr;
+ int syserr;
+
+ efx->last_irq_cpu = raw_smp_processor_id();
+ EFX_TRACE(efx, "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
+ irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
+
+ /* Check to see if we have a serious error condition */
+ syserr = EFX_OWORD_FIELD(*int_ker, FATAL_INT);
+ if (unlikely(syserr))
+ return falcon_fatal_interrupt(efx);
+
+ /* Schedule processing of the channel */
+ efx_schedule_channel(channel);
+
+ return IRQ_HANDLED;
+}
+
+
+/* Setup RSS indirection table.
+ * This maps from the hash value of the packet to RXQ
+ */
+static void falcon_setup_rss_indir_table(struct efx_nic *efx)
+{
+ int i = 0;
+ unsigned long offset;
+ efx_dword_t dword;
+
+ if (FALCON_REV(efx) < FALCON_REV_B0)
+ return;
+
+ for (offset = RX_RSS_INDIR_TBL_B0;
+ offset < RX_RSS_INDIR_TBL_B0 + 0x800;
+ offset += 0x10) {
+ EFX_POPULATE_DWORD_1(dword, RX_RSS_INDIR_ENT_B0,
+ i % efx->rss_queues);
+ falcon_writel(efx, &dword, offset);
+ i++;
+ }
+}
+
+/* Hook interrupt handler(s)
+ * Try MSI and then legacy interrupts.
+ */
+int falcon_init_interrupt(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ int rc;
+
+ if (!EFX_INT_MODE_USE_MSI(efx)) {
+ irq_handler_t handler;
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ handler = falcon_legacy_interrupt_b0;
+ else
+ handler = falcon_legacy_interrupt_a1;
+
+ rc = request_irq(efx->legacy_irq, handler, IRQF_SHARED,
+ efx->name, efx);
+ if (rc) {
+ EFX_ERR(efx, "failed to hook legacy IRQ %d\n",
+ efx->pci_dev->irq);
+ goto fail1;
+ }
+ return 0;
+ }
+
+ /* Hook MSI or MSI-X interrupt */
+ efx_for_each_channel_with_interrupt(channel, efx) {
+ rc = request_irq(channel->irq, falcon_msi_interrupt,
+ IRQF_PROBE_SHARED, /* Not shared */
+ efx->name, channel);
+ if (rc) {
+ EFX_ERR(efx, "failed to hook IRQ %d\n", channel->irq);
+ goto fail2;
+ }
+ }
+
+ return 0;
+
+ fail2:
+ efx_for_each_channel_with_interrupt(channel, efx)
+ free_irq(channel->irq, channel);
+ fail1:
+ return rc;
+}
+
+void falcon_fini_interrupt(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ efx_oword_t reg;
+
+ /* Disable MSI/MSI-X interrupts */
+ efx_for_each_channel_with_interrupt(channel, efx)
+ if (channel->irq)
+ free_irq(channel->irq, channel);
+
+ /* ACK legacy interrupt */
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ falcon_read(efx, &reg, INT_ISR0_B0);
+ else
+ falcon_irq_ack_a1(efx);
+
+ /* Disable legacy interrupt */
+ if (efx->legacy_irq)
+ free_irq(efx->legacy_irq, efx);
+}
+
+/**************************************************************************
+ *
+ * EEPROM/flash
+ *
+ **************************************************************************
+ */
+
+#define FALCON_SPI_MAX_LEN sizeof(efx_oword_t)
+
+/* Wait for SPI command completion */
+static int falcon_spi_wait(struct efx_nic *efx)
+{
+ efx_oword_t reg;
+ int cmd_en, timer_active;
+ int count;
+
+ count = 0;
+ do {
+ falcon_read(efx, &reg, EE_SPI_HCMD_REG_KER);
+ cmd_en = EFX_OWORD_FIELD(reg, EE_SPI_HCMD_CMD_EN);
+ timer_active = EFX_OWORD_FIELD(reg, EE_WR_TIMER_ACTIVE);
+ if (!cmd_en && !timer_active)
+ return 0;
+ udelay(10);
+ } while (++count < 10000); /* wait upto 100msec */
+ EFX_ERR(efx, "timed out waiting for SPI\n");
+ return -ETIMEDOUT;
+}
+
+static int
+falcon_spi_read(struct efx_nic *efx, int device_id, unsigned int command,
+ unsigned int address, unsigned int addr_len,
+ void *data, unsigned int len)
+{
+ efx_oword_t reg;
+ int rc;
+
+ BUG_ON(len > FALCON_SPI_MAX_LEN);
+
+ /* Check SPI not currently being accessed */
+ rc = falcon_spi_wait(efx);
+ if (rc)
+ return rc;
+
+ /* Program address register */
+ EFX_POPULATE_OWORD_1(reg, EE_SPI_HADR_ADR, address);
+ falcon_write(efx, &reg, EE_SPI_HADR_REG_KER);
+
+ /* Issue read command */
+ EFX_POPULATE_OWORD_7(reg,
+ EE_SPI_HCMD_CMD_EN, 1,
+ EE_SPI_HCMD_SF_SEL, device_id,
+ EE_SPI_HCMD_DABCNT, len,
+ EE_SPI_HCMD_READ, EE_SPI_READ,
+ EE_SPI_HCMD_DUBCNT, 0,
+ EE_SPI_HCMD_ADBCNT, addr_len,
+ EE_SPI_HCMD_ENC, command);
+ falcon_write(efx, &reg, EE_SPI_HCMD_REG_KER);
+
+ /* Wait for read to complete */
+ rc = falcon_spi_wait(efx);
+ if (rc)
+ return rc;
+
+ /* Read data */
+ falcon_read(efx, &reg, EE_SPI_HDATA_REG_KER);
+ memcpy(data, &reg, len);
+ return 0;
+}
+
+/**************************************************************************
+ *
+ * MAC wrapper
+ *
+ **************************************************************************
+ */
+void falcon_drain_tx_fifo(struct efx_nic *efx)
+{
+ efx_oword_t temp;
+ int count;
+
+ if (FALCON_REV(efx) < FALCON_REV_B0)
+ return;
+
+ falcon_read(efx, &temp, MAC0_CTRL_REG_KER);
+ /* There is no point in draining more than once */
+ if (EFX_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0))
+ return;
+
+ /* MAC stats will fail whilst the TX fifo is draining. Serialise
+ * the drain sequence with the statistics fetch */
+ spin_lock(&efx->stats_lock);
+
+ EFX_SET_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0, 1);
+ falcon_write(efx, &temp, MAC0_CTRL_REG_KER);
+
+ /* Reset the MAC and EM block. */
+ falcon_read(efx, &temp, GLB_CTL_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, RST_XGTX, 1);
+ EFX_SET_OWORD_FIELD(temp, RST_XGRX, 1);
+ EFX_SET_OWORD_FIELD(temp, RST_EM, 1);
+ falcon_write(efx, &temp, GLB_CTL_REG_KER);
+
+ count = 0;
+ while (1) {
+ falcon_read(efx, &temp, GLB_CTL_REG_KER);
+ if (!EFX_OWORD_FIELD(temp, RST_XGTX) &&
+ !EFX_OWORD_FIELD(temp, RST_XGRX) &&
+ !EFX_OWORD_FIELD(temp, RST_EM)) {
+ EFX_LOG(efx, "Completed MAC reset after %d loops\n",
+ count);
+ break;
+ }
+ if (count > 20) {
+ EFX_ERR(efx, "MAC reset failed\n");
+ break;
+ }
+ count++;
+ udelay(10);
+ }
+
+ spin_unlock(&efx->stats_lock);
+
+ /* If we've reset the EM block and the link is up, then
+ * we'll have to kick the XAUI link so the PHY can recover */
+ if (efx->link_up && EFX_WORKAROUND_5147(efx))
+ falcon_reset_xaui(efx);
+}
+
+void falcon_deconfigure_mac_wrapper(struct efx_nic *efx)
+{
+ efx_oword_t temp;
+
+ if (FALCON_REV(efx) < FALCON_REV_B0)
+ return;
+
+ /* Isolate the MAC -> RX */
+ falcon_read(efx, &temp, RX_CFG_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, RX_INGR_EN_B0, 0);
+ falcon_write(efx, &temp, RX_CFG_REG_KER);
+
+ if (!efx->link_up)
+ falcon_drain_tx_fifo(efx);
+}
+
+void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
+{
+ efx_oword_t reg;
+ int link_speed;
+ unsigned int tx_fc;
+
+ if (efx->link_options & GM_LPA_10000)
+ link_speed = 0x3;
+ else if (efx->link_options & GM_LPA_1000)
+ link_speed = 0x2;
+ else if (efx->link_options & GM_LPA_100)
+ link_speed = 0x1;
+ else
+ link_speed = 0x0;
+ /* MAC_LINK_STATUS controls MAC backpressure but doesn't work
+ * as advertised. Disable to ensure packets are not
+ * indefinitely held and TX queue can be flushed at any point
+ * while the link is down. */
+ EFX_POPULATE_OWORD_5(reg,
+ MAC_XOFF_VAL, 0xffff /* max pause time */,
+ MAC_BCAD_ACPT, 1,
+ MAC_UC_PROM, efx->promiscuous,
+ MAC_LINK_STATUS, 1, /* always set */
+ MAC_SPEED, link_speed);
+ /* On B0, MAC backpressure can be disabled and packets get
+ * discarded. */
+ if (FALCON_REV(efx) >= FALCON_REV_B0) {
+ EFX_SET_OWORD_FIELD(reg, TXFIFO_DRAIN_EN_B0,
+ !efx->link_up);
+ }
+
+ falcon_write(efx, &reg, MAC0_CTRL_REG_KER);
+
+ /* Restore the multicast hash registers. */
+ falcon_set_multicast_hash(efx);
+
+ /* Transmission of pause frames when RX crosses the threshold is
+ * covered by RX_XOFF_MAC_EN and XM_TX_CFG_REG:XM_FCNTL.
+ * Action on receipt of pause frames is controller by XM_DIS_FCNTL */
+ tx_fc = (efx->flow_control & EFX_FC_TX) ? 1 : 0;
+ falcon_read(efx, &reg, RX_CFG_REG_KER);
+ EFX_SET_OWORD_FIELD_VER(efx, reg, RX_XOFF_MAC_EN, tx_fc);
+
+ /* Unisolate the MAC -> RX */
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ EFX_SET_OWORD_FIELD(reg, RX_INGR_EN_B0, 1);
+ falcon_write(efx, &reg, RX_CFG_REG_KER);
+}
+
+int falcon_dma_stats(struct efx_nic *efx, unsigned int done_offset)
+{
+ efx_oword_t reg;
+ u32 *dma_done;
+ int i;
+
+ if (disable_dma_stats)
+ return 0;
+
+ /* Statistics fetch will fail if the MAC is in TX drain */
+ if (FALCON_REV(efx) >= FALCON_REV_B0) {
+ efx_oword_t temp;
+ falcon_read(efx, &temp, MAC0_CTRL_REG_KER);
+ if (EFX_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0))
+ return 0;
+ }
+
+ dma_done = (efx->stats_buffer.addr + done_offset);
+ *dma_done = FALCON_STATS_NOT_DONE;
+ wmb(); /* ensure done flag is clear */
+
+ /* Initiate DMA transfer of stats */
+ EFX_POPULATE_OWORD_2(reg,
+ MAC_STAT_DMA_CMD, 1,
+ MAC_STAT_DMA_ADR,
+ efx->stats_buffer.dma_addr);
+ falcon_write(efx, &reg, MAC0_STAT_DMA_REG_KER);
+
+ /* Wait for transfer to complete */
+ for (i = 0; i < 400; i++) {
+ if (*(volatile u32 *)dma_done == FALCON_STATS_DONE)
+ return 0;
+ udelay(10);
+ }
+
+ EFX_ERR(efx, "timed out waiting for statistics\n");
+ return -ETIMEDOUT;
+}
+
+/**************************************************************************
+ *
+ * PHY access via GMII
+ *
+ **************************************************************************
+ */
+
+/* Use the top bit of the MII PHY id to indicate the PHY type
+ * (1G/10G), with the remaining bits as the actual PHY id.
+ *
+ * This allows us to avoid leaking information from the mii_if_info
+ * structure into other data structures.
+ */
+#define FALCON_PHY_ID_ID_WIDTH EFX_WIDTH(MD_PRT_DEV_ADR)
+#define FALCON_PHY_ID_ID_MASK ((1 << FALCON_PHY_ID_ID_WIDTH) - 1)
+#define FALCON_PHY_ID_WIDTH (FALCON_PHY_ID_ID_WIDTH + 1)
+#define FALCON_PHY_ID_MASK ((1 << FALCON_PHY_ID_WIDTH) - 1)
+#define FALCON_PHY_ID_10G (1 << (FALCON_PHY_ID_WIDTH - 1))
+
+
+/* Packing the clause 45 port and device fields into a single value */
+#define MD_PRT_ADR_COMP_LBN (MD_PRT_ADR_LBN - MD_DEV_ADR_LBN)
+#define MD_PRT_ADR_COMP_WIDTH MD_PRT_ADR_WIDTH
+#define MD_DEV_ADR_COMP_LBN 0
+#define MD_DEV_ADR_COMP_WIDTH MD_DEV_ADR_WIDTH
+
+
+/* Wait for GMII access to complete */
+static int falcon_gmii_wait(struct efx_nic *efx)
+{
+ efx_dword_t md_stat;
+ int count;
+
+ for (count = 0; count < 1000; count++) { /* wait upto 10ms */
+ falcon_readl(efx, &md_stat, MD_STAT_REG_KER);
+ if (EFX_DWORD_FIELD(md_stat, MD_BSY) == 0) {
+ if (EFX_DWORD_FIELD(md_stat, MD_LNFL) != 0 ||
+ EFX_DWORD_FIELD(md_stat, MD_BSERR) != 0) {
+ EFX_ERR(efx, "error from GMII access "
+ EFX_DWORD_FMT"\n",
+ EFX_DWORD_VAL(md_stat));
+ return -EIO;
+ }
+ return 0;
+ }
+ udelay(10);
+ }
+ EFX_ERR(efx, "timed out waiting for GMII\n");
+ return -ETIMEDOUT;
+}
+
+/* Writes a GMII register of a PHY connected to Falcon using MDIO. */
+static void falcon_mdio_write(struct net_device *net_dev, int phy_id,
+ int addr, int value)
+{
+ struct efx_nic *efx = (struct efx_nic *)net_dev->priv;
+ unsigned int phy_id2 = phy_id & FALCON_PHY_ID_ID_MASK;
+ efx_oword_t reg;
+
+ /* The 'generic' prt/dev packing in mdio_10g.h is conveniently
+ * chosen so that the only current user, Falcon, can take the
+ * packed value and use them directly.
+ * Fail to build if this assumption is broken.
+ */
+ BUILD_BUG_ON(FALCON_PHY_ID_10G != MDIO45_XPRT_ID_IS10G);
+ BUILD_BUG_ON(FALCON_PHY_ID_ID_WIDTH != MDIO45_PRT_DEV_WIDTH);
+ BUILD_BUG_ON(MD_PRT_ADR_COMP_LBN != MDIO45_PRT_ID_COMP_LBN);
+ BUILD_BUG_ON(MD_DEV_ADR_COMP_LBN != MDIO45_DEV_ID_COMP_LBN);
+
+ if (phy_id2 == PHY_ADDR_INVALID)
+ return;
+
+ /* See falcon_mdio_read for an explanation. */
+ if (!(phy_id & FALCON_PHY_ID_10G)) {
+ int mmd = ffs(efx->phy_op->mmds) - 1;
+ EFX_TRACE(efx, "Fixing erroneous clause22 write\n");
+ phy_id2 = mdio_clause45_pack(phy_id2, mmd)
+ & FALCON_PHY_ID_ID_MASK;
+ }
+
+ EFX_REGDUMP(efx, "writing GMII %d register %02x with %04x\n", phy_id,
+ addr, value);
+
+ spin_lock_bh(&efx->phy_lock);
+
+ /* Check MII not currently being accessed */
+ if (falcon_gmii_wait(efx) != 0)
+ goto out;
+
+ /* Write the address/ID register */
+ EFX_POPULATE_OWORD_1(reg, MD_PHY_ADR, addr);
+ falcon_write(efx, &reg, MD_PHY_ADR_REG_KER);
+
+ EFX_POPULATE_OWORD_1(reg, MD_PRT_DEV_ADR, phy_id2);
+ falcon_write(efx, &reg, MD_ID_REG_KER);
+
+ /* Write data */
+ EFX_POPULATE_OWORD_1(reg, MD_TXD, value);
+ falcon_write(efx, &reg, MD_TXD_REG_KER);
+
+ EFX_POPULATE_OWORD_2(reg,
+ MD_WRC, 1,
+ MD_GC, 0);
+ falcon_write(efx, &reg, MD_CS_REG_KER);
+
+ /* Wait for data to be written */
+ if (falcon_gmii_wait(efx) != 0) {
+ /* Abort the write operation */
+ EFX_POPULATE_OWORD_2(reg,
+ MD_WRC, 0,
+ MD_GC, 1);
+ falcon_write(efx, &reg, MD_CS_REG_KER);
+ udelay(10);
+ }
+
+ out:
+ spin_unlock_bh(&efx->phy_lock);
+}
+
+/* Reads a GMII register from a PHY connected to Falcon. If no value
+ * could be read, -1 will be returned. */
+static int falcon_mdio_read(struct net_device *net_dev, int phy_id, int addr)
+{
+ struct efx_nic *efx = (struct efx_nic *)net_dev->priv;
+ unsigned int phy_addr = phy_id & FALCON_PHY_ID_ID_MASK;
+ efx_oword_t reg;
+ int value = -1;
+
+ if (phy_addr == PHY_ADDR_INVALID)
+ return -1;
+
+ /* Our PHY code knows whether it needs to talk clause 22(1G) or 45(10G)
+ * but the generic Linux code does not make any distinction or have
+ * any state for this.
+ * We spot the case where someone tried to talk 22 to a 45 PHY and
+ * redirect the request to the lowest numbered MMD as a clause45
+ * request. This is enough to allow simple queries like id and link
+ * state to succeed. TODO: We may need to do more in future.
+ */
+ if (!(phy_id & FALCON_PHY_ID_10G)) {
+ int mmd = ffs(efx->phy_op->mmds) - 1;
+ EFX_TRACE(efx, "Fixing erroneous clause22 read\n");
+ phy_addr = mdio_clause45_pack(phy_addr, mmd)
+ & FALCON_PHY_ID_ID_MASK;
+ }
+
+ spin_lock_bh(&efx->phy_lock);
+
+ /* Check MII not currently being accessed */
+ if (falcon_gmii_wait(efx) != 0)
+ goto out;
+
+ EFX_POPULATE_OWORD_1(reg, MD_PHY_ADR, addr);
+ falcon_write(efx, &reg, MD_PHY_ADR_REG_KER);
+
+ EFX_POPULATE_OWORD_1(reg, MD_PRT_DEV_ADR, phy_addr);
+ falcon_write(efx, &reg, MD_ID_REG_KER);
+
+ /* Request data to be read */
+ EFX_POPULATE_OWORD_2(reg, MD_RDC, 1, MD_GC, 0);
+ falcon_write(efx, &reg, MD_CS_REG_KER);
+
+ /* Wait for data to become available */
+ value = falcon_gmii_wait(efx);
+ if (value == 0) {
+ falcon_read(efx, &reg, MD_RXD_REG_KER);
+ value = EFX_OWORD_FIELD(reg, MD_RXD);
+ EFX_REGDUMP(efx, "read from GMII %d register %02x, got %04x\n",
+ phy_id, addr, value);
+ } else {
+ /* Abort the read operation */
+ EFX_POPULATE_OWORD_2(reg,
+ MD_RIC, 0,
+ MD_GC, 1);
+ falcon_write(efx, &reg, MD_CS_REG_KER);
+
+ EFX_LOG(efx, "read from GMII 0x%x register %02x, got "
+ "error %d\n", phy_id, addr, value);
+ }
+
+ out:
+ spin_unlock_bh(&efx->phy_lock);
+
+ return value;
+}
+
+static void falcon_init_mdio(struct mii_if_info *gmii)
+{
+ gmii->mdio_read = falcon_mdio_read;
+ gmii->mdio_write = falcon_mdio_write;
+ gmii->phy_id_mask = FALCON_PHY_ID_MASK;
+ gmii->reg_num_mask = ((1 << EFX_WIDTH(MD_PHY_ADR)) - 1);
+}
+
+static int falcon_probe_phy(struct efx_nic *efx)
+{
+ switch (efx->phy_type) {
+ case PHY_TYPE_10XPRESS:
+ efx->phy_op = &falcon_tenxpress_phy_ops;
+ break;
+ case PHY_TYPE_XFP:
+ efx->phy_op = &falcon_xfp_phy_ops;
+ break;
+ default:
+ EFX_ERR(efx, "Unknown PHY type %d\n",
+ efx->phy_type);
+ return -1;
+ }
+ return 0;
+}
+
+/* This call is responsible for hooking in the MAC and PHY operations */
+int falcon_probe_port(struct efx_nic *efx)
+{
+ int rc;
+
+ /* Hook in PHY operations table */
+ rc = falcon_probe_phy(efx);
+ if (rc)
+ return rc;
+
+ /* Set up GMII structure for PHY */
+ efx->mii.supports_gmii = 1;
+ falcon_init_mdio(&efx->mii);
+
+ /* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ efx->flow_control = EFX_FC_RX | EFX_FC_TX;
+ else
+ efx->flow_control = EFX_FC_RX;
+
+ /* Allocate buffer for stats */
+ rc = falcon_alloc_buffer(efx, &efx->stats_buffer,
+ FALCON_MAC_STATS_SIZE);
+ if (rc)
+ return rc;
+ EFX_LOG(efx, "stats buffer at %llx (virt %p phys %lx)\n",
+ (unsigned long long)efx->stats_buffer.dma_addr,
+ efx->stats_buffer.addr,
+ virt_to_phys(efx->stats_buffer.addr));
+
+ return 0;
+}
+
+void falcon_remove_port(struct efx_nic *efx)
+{
+ falcon_free_buffer(efx, &efx->stats_buffer);
+}
+
+/**************************************************************************
+ *
+ * Multicast filtering
+ *
+ **************************************************************************
+ */
+
+void falcon_set_multicast_hash(struct efx_nic *efx)
+{
+ union efx_multicast_hash *mc_hash = &efx->multicast_hash;
+
+ /* Broadcast packets go through the multicast hash filter.
+ * ether_crc_le() of the broadcast address is 0xbe2612ff
+ * so we always add bit 0xff to the mask.
+ */
+ set_bit_le(0xff, mc_hash->byte);
+
+ falcon_write(efx, &mc_hash->oword[0], MAC_MCAST_HASH_REG0_KER);
+ falcon_write(efx, &mc_hash->oword[1], MAC_MCAST_HASH_REG1_KER);
+}
+
+/**************************************************************************
+ *
+ * Device reset
+ *
+ **************************************************************************
+ */
+
+/* Resets NIC to known state. This routine must be called in process
+ * context and is allowed to sleep. */
+int falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
+{
+ struct falcon_nic_data *nic_data = efx->nic_data;
+ efx_oword_t glb_ctl_reg_ker;
+ int rc;
+
+ EFX_LOG(efx, "performing hardware reset (%d)\n", method);
+
+ /* Initiate device reset */
+ if (method == RESET_TYPE_WORLD) {
+ rc = pci_save_state(efx->pci_dev);
+ if (rc) {
+ EFX_ERR(efx, "failed to backup PCI state of primary "
+ "function prior to hardware reset\n");
+ goto fail1;
+ }
+ if (FALCON_IS_DUAL_FUNC(efx)) {
+ rc = pci_save_state(nic_data->pci_dev2);
+ if (rc) {
+ EFX_ERR(efx, "failed to backup PCI state of "
+ "secondary function prior to "
+ "hardware reset\n");
+ goto fail2;
+ }
+ }
+
+ EFX_POPULATE_OWORD_2(glb_ctl_reg_ker,
+ EXT_PHY_RST_DUR, 0x7,
+ SWRST, 1);
+ } else {
+ int reset_phy = (method == RESET_TYPE_INVISIBLE ?
+ EXCLUDE_FROM_RESET : 0);
+
+ EFX_POPULATE_OWORD_7(glb_ctl_reg_ker,
+ EXT_PHY_RST_CTL, reset_phy,
+ PCIE_CORE_RST_CTL, EXCLUDE_FROM_RESET,
+ PCIE_NSTCK_RST_CTL, EXCLUDE_FROM_RESET,
+ PCIE_SD_RST_CTL, EXCLUDE_FROM_RESET,
+ EE_RST_CTL, EXCLUDE_FROM_RESET,
+ EXT_PHY_RST_DUR, 0x7 /* 10ms */,
+ SWRST, 1);
+ }
+ falcon_write(efx, &glb_ctl_reg_ker, GLB_CTL_REG_KER);
+
+ EFX_LOG(efx, "waiting for hardware reset\n");
+ schedule_timeout_uninterruptible(HZ / 20);
+
+ /* Restore PCI configuration if needed */
+ if (method == RESET_TYPE_WORLD) {
+ if (FALCON_IS_DUAL_FUNC(efx)) {
+ rc = pci_restore_state(nic_data->pci_dev2);
+ if (rc) {
+ EFX_ERR(efx, "failed to restore PCI config for "
+ "the secondary function\n");
+ goto fail3;
+ }
+ }
+ rc = pci_restore_state(efx->pci_dev);
+ if (rc) {
+ EFX_ERR(efx, "failed to restore PCI config for the "
+ "primary function\n");
+ goto fail4;
+ }
+ EFX_LOG(efx, "successfully restored PCI config\n");
+ }
+
+ /* Assert that reset complete */
+ falcon_read(efx, &glb_ctl_reg_ker, GLB_CTL_REG_KER);
+ if (EFX_OWORD_FIELD(glb_ctl_reg_ker, SWRST) != 0) {
+ rc = -ETIMEDOUT;
+ EFX_ERR(efx, "timed out waiting for hardware reset\n");
+ goto fail5;
+ }
+ EFX_LOG(efx, "hardware reset complete\n");
+
+ return 0;
+
+ /* pci_save_state() and pci_restore_state() MUST be called in pairs */
+fail2:
+fail3:
+ pci_restore_state(efx->pci_dev);
+fail1:
+fail4:
+fail5:
+ return rc;
+}
+
+/* Zeroes out the SRAM contents. This routine must be called in
+ * process context and is allowed to sleep.
+ */
+static int falcon_reset_sram(struct efx_nic *efx)
+{
+ efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker;
+ int count;
+
+ /* Set the SRAM wake/sleep GPIO appropriately. */
+ falcon_read(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER);
+ EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OEN, 1);
+ EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, GPIO1_OUT, 1);
+ falcon_write(efx, &gpio_cfg_reg_ker, GPIO_CTL_REG_KER);
+
+ /* Initiate SRAM reset */
+ EFX_POPULATE_OWORD_2(srm_cfg_reg_ker,
+ SRAM_OOB_BT_INIT_EN, 1,
+ SRM_NUM_BANKS_AND_BANK_SIZE, 0);
+ falcon_write(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER);
+
+ /* Wait for SRAM reset to complete */
+ count = 0;
+ do {
+ EFX_LOG(efx, "waiting for SRAM reset (attempt %d)...\n", count);
+
+ /* SRAM reset is slow; expect around 16ms */
+ schedule_timeout_uninterruptible(HZ / 50);
+
+ /* Check for reset complete */
+ falcon_read(efx, &srm_cfg_reg_ker, SRM_CFG_REG_KER);
+ if (!EFX_OWORD_FIELD(srm_cfg_reg_ker, SRAM_OOB_BT_INIT_EN)) {
+ EFX_LOG(efx, "SRAM reset complete\n");
+
+ return 0;
+ }
+ } while (++count < 20); /* wait upto 0.4 sec */
+
+ EFX_ERR(efx, "timed out waiting for SRAM reset\n");
+ return -ETIMEDOUT;
+}
+
+/* Extract non-volatile configuration */
+static int falcon_probe_nvconfig(struct efx_nic *efx)
+{
+ struct falcon_nvconfig *nvconfig;
+ efx_oword_t nic_stat;
+ int device_id;
+ unsigned addr_len;
+ size_t offset, len;
+ int magic_num, struct_ver, board_rev;
+ int rc;
+
+ /* Find the boot device. */
+ falcon_read(efx, &nic_stat, NIC_STAT_REG);
+ if (EFX_OWORD_FIELD(nic_stat, SF_PRST)) {
+ device_id = EE_SPI_FLASH;
+ addr_len = 3;
+ } else if (EFX_OWORD_FIELD(nic_stat, EE_PRST)) {
+ device_id = EE_SPI_EEPROM;
+ addr_len = 2;
+ } else {
+ return -ENODEV;
+ }
+
+ nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL);
+
+ /* Read the whole configuration structure into memory. */
+ for (offset = 0; offset < sizeof(*nvconfig); offset += len) {
+ len = min(sizeof(*nvconfig) - offset,
+ (size_t) FALCON_SPI_MAX_LEN);
+ rc = falcon_spi_read(efx, device_id, SPI_READ,
+ NVCONFIG_BASE + offset, addr_len,
+ (char *)nvconfig + offset, len);
+ if (rc)
+ goto out;
+ }
+
+ /* Read the MAC addresses */
+ memcpy(efx->mac_address, nvconfig->mac_address[0], ETH_ALEN);
+
+ /* Read the board configuration. */
+ magic_num = le16_to_cpu(nvconfig->board_magic_num);
+ struct_ver = le16_to_cpu(nvconfig->board_struct_ver);
+
+ if (magic_num != NVCONFIG_BOARD_MAGIC_NUM || struct_ver < 2) {
+ EFX_ERR(efx, "Non volatile memory bad magic=%x ver=%x "
+ "therefore using defaults\n", magic_num, struct_ver);
+ efx->phy_type = PHY_TYPE_NONE;
+ efx->mii.phy_id = PHY_ADDR_INVALID;
+ board_rev = 0;
+ } else {
+ struct falcon_nvconfig_board_v2 *v2 = &nvconfig->board_v2;
+
+ efx->phy_type = v2->port0_phy_type;
+ efx->mii.phy_id = v2->port0_phy_addr;
+ board_rev = le16_to_cpu(v2->board_revision);
+ }
+
+ EFX_LOG(efx, "PHY is %d phy_id %d\n", efx->phy_type, efx->mii.phy_id);
+
+ efx_set_board_info(efx, board_rev);
+
+ out:
+ kfree(nvconfig);
+ return rc;
+}
+
+/* Probe the NIC variant (revision, ASIC vs FPGA, function count, port
+ * count, port speed). Set workaround and feature flags accordingly.
+ */
+static int falcon_probe_nic_variant(struct efx_nic *efx)
+{
+ efx_oword_t altera_build;
+
+ falcon_read(efx, &altera_build, ALTERA_BUILD_REG_KER);
+ if (EFX_OWORD_FIELD(altera_build, VER_ALL)) {
+ EFX_ERR(efx, "Falcon FPGA not supported\n");
+ return -ENODEV;
+ }
+
+ switch (FALCON_REV(efx)) {
+ case FALCON_REV_A0:
+ case 0xff:
+ EFX_ERR(efx, "Falcon rev A0 not supported\n");
+ return -ENODEV;
+
+ case FALCON_REV_A1:{
+ efx_oword_t nic_stat;
+
+ falcon_read(efx, &nic_stat, NIC_STAT_REG);
+
+ if (EFX_OWORD_FIELD(nic_stat, STRAP_PCIE) == 0) {
+ EFX_ERR(efx, "Falcon rev A1 PCI-X not supported\n");
+ return -ENODEV;
+ }
+ if (!EFX_OWORD_FIELD(nic_stat, STRAP_10G)) {
+ EFX_ERR(efx, "1G mode not supported\n");
+ return -ENODEV;
+ }
+ break;
+ }
+
+ case FALCON_REV_B0:
+ break;
+
+ default:
+ EFX_ERR(efx, "Unknown Falcon rev %d\n", FALCON_REV(efx));
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+int falcon_probe_nic(struct efx_nic *efx)
+{
+ struct falcon_nic_data *nic_data;
+ int rc;
+
+ /* Initialise I2C interface state */
+ efx->i2c.efx = efx;
+ efx->i2c.op = &falcon_i2c_bit_operations;
+ efx->i2c.sda = 1;
+ efx->i2c.scl = 1;
+
+ /* Allocate storage for hardware specific data */
+ nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL);
+ efx->nic_data = (void *) nic_data;
+
+ /* Determine number of ports etc. */
+ rc = falcon_probe_nic_variant(efx);
+ if (rc)
+ goto fail1;
+
+ /* Probe secondary function if expected */
+ if (FALCON_IS_DUAL_FUNC(efx)) {
+ struct pci_dev *dev = pci_dev_get(efx->pci_dev);
+
+ while ((dev = pci_get_device(EFX_VENDID_SFC, FALCON_A_S_DEVID,
+ dev))) {
+ if (dev->bus == efx->pci_dev->bus &&
+ dev->devfn == efx->pci_dev->devfn + 1) {
+ nic_data->pci_dev2 = dev;
+ break;
+ }
+ }
+ if (!nic_data->pci_dev2) {
+ EFX_ERR(efx, "failed to find secondary function\n");
+ rc = -ENODEV;
+ goto fail2;
+ }
+ }
+
+ /* Now we can reset the NIC */
+ rc = falcon_reset_hw(efx, RESET_TYPE_ALL);
+ if (rc) {
+ EFX_ERR(efx, "failed to reset NIC\n");
+ goto fail3;
+ }
+
+ /* Allocate memory for INT_KER */
+ rc = falcon_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t));
+ if (rc)
+ goto fail4;
+ BUG_ON(efx->irq_status.dma_addr & 0x0f);
+
+ EFX_LOG(efx, "INT_KER at %llx (virt %p phys %lx)\n",
+ (unsigned long long)efx->irq_status.dma_addr,
+ efx->irq_status.addr, virt_to_phys(efx->irq_status.addr));
+
+ /* Read in the non-volatile configuration */
+ rc = falcon_probe_nvconfig(efx);
+ if (rc)
+ goto fail5;
+
+ return 0;
+
+ fail5:
+ falcon_free_buffer(efx, &efx->irq_status);
+ fail4:
+ /* fall-thru */
+ fail3:
+ if (nic_data->pci_dev2) {
+ pci_dev_put(nic_data->pci_dev2);
+ nic_data->pci_dev2 = NULL;
+ }
+ fail2:
+ /* fall-thru */
+ fail1:
+ kfree(efx->nic_data);
+ return rc;
+}
+
+/* This call performs hardware-specific global initialisation, such as
+ * defining the descriptor cache sizes and number of RSS channels.
+ * It does not set up any buffers, descriptor rings or event queues.
+ */
+int falcon_init_nic(struct efx_nic *efx)
+{
+ struct falcon_nic_data *data;
+ efx_oword_t temp;
+ unsigned thresh;
+ int rc;
+
+ data = (struct falcon_nic_data *)efx->nic_data;
+
+ /* Set up the address region register. This is only needed
+ * for the B0 FPGA, but since we are just pushing in the
+ * reset defaults this may as well be unconditional. */
+ EFX_POPULATE_OWORD_4(temp, ADR_REGION0, 0,
+ ADR_REGION1, (1 << 16),
+ ADR_REGION2, (2 << 16),
+ ADR_REGION3, (3 << 16));
+ falcon_write(efx, &temp, ADR_REGION_REG_KER);
+
+ /* Use on-chip SRAM */
+ falcon_read(efx, &temp, NIC_STAT_REG);
+ EFX_SET_OWORD_FIELD(temp, ONCHIP_SRAM, 1);
+ falcon_write(efx, &temp, NIC_STAT_REG);
+
+ /* Set buffer table mode */
+ EFX_POPULATE_OWORD_1(temp, BUF_TBL_MODE, BUF_TBL_MODE_FULL);
+ falcon_write(efx, &temp, BUF_TBL_CFG_REG_KER);
+
+ rc = falcon_reset_sram(efx);
+ if (rc)
+ return rc;
+
+ /* Set positions of descriptor caches in SRAM. */
+ EFX_POPULATE_OWORD_1(temp, SRM_TX_DC_BASE_ADR, TX_DC_BASE / 8);
+ falcon_write(efx, &temp, SRM_TX_DC_CFG_REG_KER);
+ EFX_POPULATE_OWORD_1(temp, SRM_RX_DC_BASE_ADR, RX_DC_BASE / 8);
+ falcon_write(efx, &temp, SRM_RX_DC_CFG_REG_KER);
+
+ /* Set TX descriptor cache size. */
+ BUILD_BUG_ON(TX_DC_ENTRIES != (16 << TX_DC_ENTRIES_ORDER));
+ EFX_POPULATE_OWORD_1(temp, TX_DC_SIZE, TX_DC_ENTRIES_ORDER);
+ falcon_write(efx, &temp, TX_DC_CFG_REG_KER);
+
+ /* Set RX descriptor cache size. Set low watermark to size-8, as
+ * this allows most efficient prefetching.
+ */
+ BUILD_BUG_ON(RX_DC_ENTRIES != (16 << RX_DC_ENTRIES_ORDER));
+ EFX_POPULATE_OWORD_1(temp, RX_DC_SIZE, RX_DC_ENTRIES_ORDER);
+ falcon_write(efx, &temp, RX_DC_CFG_REG_KER);
+ EFX_POPULATE_OWORD_1(temp, RX_DC_PF_LWM, RX_DC_ENTRIES - 8);
+ falcon_write(efx, &temp, RX_DC_PF_WM_REG_KER);
+
+ /* Clear the parity enables on the TX data fifos as
+ * they produce false parity errors because of timing issues
+ */
+ if (EFX_WORKAROUND_5129(efx)) {
+ falcon_read(efx, &temp, SPARE_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, MEM_PERR_EN_TX_DATA, 0);
+ falcon_write(efx, &temp, SPARE_REG_KER);
+ }
+
+ /* Enable all the genuinely fatal interrupts. (They are still
+ * masked by the overall interrupt mask, controlled by
+ * falcon_interrupts()).
+ *
+ * Note: All other fatal interrupts are enabled
+ */
+ EFX_POPULATE_OWORD_3(temp,
+ ILL_ADR_INT_KER_EN, 1,
+ RBUF_OWN_INT_KER_EN, 1,
+ TBUF_OWN_INT_KER_EN, 1);
+ EFX_INVERT_OWORD(temp);
+ falcon_write(efx, &temp, FATAL_INTR_REG_KER);
+
+ /* Set number of RSS queues for receive path. */
+ falcon_read(efx, &temp, RX_FILTER_CTL_REG);
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ EFX_SET_OWORD_FIELD(temp, NUM_KER, 0);
+ else
+ EFX_SET_OWORD_FIELD(temp, NUM_KER, efx->rss_queues - 1);
+ if (EFX_WORKAROUND_7244(efx)) {
+ EFX_SET_OWORD_FIELD(temp, UDP_FULL_SRCH_LIMIT, 8);
+ EFX_SET_OWORD_FIELD(temp, UDP_WILD_SRCH_LIMIT, 8);
+ EFX_SET_OWORD_FIELD(temp, TCP_FULL_SRCH_LIMIT, 8);
+ EFX_SET_OWORD_FIELD(temp, TCP_WILD_SRCH_LIMIT, 8);
+ }
+ falcon_write(efx, &temp, RX_FILTER_CTL_REG);
+
+ falcon_setup_rss_indir_table(efx);
+
+ /* Setup RX. Wait for descriptor is broken and must
+ * be disabled. RXDP recovery shouldn't be needed, but is.
+ */
+ falcon_read(efx, &temp, RX_SELF_RST_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, RX_NODESC_WAIT_DIS, 1);
+ EFX_SET_OWORD_FIELD(temp, RX_RECOVERY_EN, 1);
+ if (EFX_WORKAROUND_5583(efx))
+ EFX_SET_OWORD_FIELD(temp, RX_ISCSI_DIS, 1);
+ falcon_write(efx, &temp, RX_SELF_RST_REG_KER);
+
+ /* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be
+ * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q.
+ */
+ falcon_read(efx, &temp, TX_CFG2_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, TX_RX_SPACER, 0xfe);
+ EFX_SET_OWORD_FIELD(temp, TX_RX_SPACER_EN, 1);
+ EFX_SET_OWORD_FIELD(temp, TX_ONE_PKT_PER_Q, 1);
+ EFX_SET_OWORD_FIELD(temp, TX_CSR_PUSH_EN, 0);
+ EFX_SET_OWORD_FIELD(temp, TX_DIS_NON_IP_EV, 1);
+ /* Enable SW_EV to inherit in char driver - assume harmless here */
+ EFX_SET_OWORD_FIELD(temp, TX_SW_EV_EN, 1);
+ /* Prefetch threshold 2 => fetch when descriptor cache half empty */
+ EFX_SET_OWORD_FIELD(temp, TX_PREF_THRESHOLD, 2);
+ /* Squash TX of packets of 16 bytes or less */
+ if (FALCON_REV(efx) >= FALCON_REV_B0 && EFX_WORKAROUND_9141(efx))
+ EFX_SET_OWORD_FIELD(temp, TX_FLUSH_MIN_LEN_EN_B0, 1);
+ falcon_write(efx, &temp, TX_CFG2_REG_KER);
+
+ /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
+ * descriptors (which is bad).
+ */
+ falcon_read(efx, &temp, TX_CFG_REG_KER);
+ EFX_SET_OWORD_FIELD(temp, TX_NO_EOP_DISC_EN, 0);
+ falcon_write(efx, &temp, TX_CFG_REG_KER);
+
+ /* RX config */
+ falcon_read(efx, &temp, RX_CFG_REG_KER);
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_DESC_PUSH_EN, 0);
+ if (EFX_WORKAROUND_7575(efx))
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_USR_BUF_SIZE,
+ (3 * 4096) / 32);
+ if (FALCON_REV(efx) >= FALCON_REV_B0)
+ EFX_SET_OWORD_FIELD(temp, RX_INGR_EN_B0, 1);
+
+ /* RX FIFO flow control thresholds */
+ thresh = ((rx_xon_thresh_bytes >= 0) ?
+ rx_xon_thresh_bytes : efx->type->rx_xon_thresh);
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XON_MAC_TH, thresh / 256);
+ thresh = ((rx_xoff_thresh_bytes >= 0) ?
+ rx_xoff_thresh_bytes : efx->type->rx_xoff_thresh);
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XOFF_MAC_TH, thresh / 256);
+ /* RX control FIFO thresholds [32 entries] */
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XON_TX_TH, 25);
+ EFX_SET_OWORD_FIELD_VER(efx, temp, RX_XOFF_TX_TH, 20);
+ falcon_write(efx, &temp, RX_CFG_REG_KER);
+
+ /* Set destination of both TX and RX Flush events */
+ if (FALCON_REV(efx) >= FALCON_REV_B0) {
+ EFX_POPULATE_OWORD_1(temp, FLS_EVQ_ID, 0);
+ falcon_write(efx, &temp, DP_CTRL_REG);
+ }
+
+ return 0;
+}
+
+void falcon_remove_nic(struct efx_nic *efx)
+{
+ struct falcon_nic_data *nic_data = efx->nic_data;
+
+ falcon_free_buffer(efx, &efx->irq_status);
+
+ (void) falcon_reset_hw(efx, RESET_TYPE_ALL);
+
+ /* Release the second function after the reset */
+ if (nic_data->pci_dev2) {
+ pci_dev_put(nic_data->pci_dev2);
+ nic_data->pci_dev2 = NULL;
+ }
+
+ /* Tear down the private nic state */
+ kfree(efx->nic_data);
+ efx->nic_data = NULL;
+}
+
+void falcon_update_nic_stats(struct efx_nic *efx)
+{
+ efx_oword_t cnt;
+
+ falcon_read(efx, &cnt, RX_NODESC_DROP_REG_KER);
+ efx->n_rx_nodesc_drop_cnt += EFX_OWORD_FIELD(cnt, RX_NODESC_DROP_CNT);
+}
+
+/**************************************************************************
+ *
+ * Revision-dependent attributes used by efx.c
+ *
+ **************************************************************************
+ */
+
+struct efx_nic_type falcon_a_nic_type = {
+ .mem_bar = 2,
+ .mem_map_size = 0x20000,
+ .txd_ptr_tbl_base = TX_DESC_PTR_TBL_KER_A1,
+ .rxd_ptr_tbl_base = RX_DESC_PTR_TBL_KER_A1,
+ .buf_tbl_base = BUF_TBL_KER_A1,
+ .evq_ptr_tbl_base = EVQ_PTR_TBL_KER_A1,
+ .evq_rptr_tbl_base = EVQ_RPTR_REG_KER_A1,
+ .txd_ring_mask = FALCON_TXD_RING_MASK,
+ .rxd_ring_mask = FALCON_RXD_RING_MASK,
+ .evq_size = FALCON_EVQ_SIZE,
+ .max_dma_mask = FALCON_DMA_MASK,
+ .tx_dma_mask = FALCON_TX_DMA_MASK,
+ .bug5391_mask = 0xf,
+ .rx_xoff_thresh = 2048,
+ .rx_xon_thresh = 512,
+ .rx_buffer_padding = 0x24,
+ .max_interrupt_mode = EFX_INT_MODE_MSI,
+ .phys_addr_channels = 4,
+};
+
+struct efx_nic_type falcon_b_nic_type = {
+ .mem_bar = 2,
+ /* Map everything up to and including the RSS indirection
+ * table. Don't map MSI-X table, MSI-X PBA since Linux
+ * requires that they not be mapped. */
+ .mem_map_size = RX_RSS_INDIR_TBL_B0 + 0x800,
+ .txd_ptr_tbl_base = TX_DESC_PTR_TBL_KER_B0,
+ .rxd_ptr_tbl_base = RX_DESC_PTR_TBL_KER_B0,
+ .buf_tbl_base = BUF_TBL_KER_B0,
+ .evq_ptr_tbl_base = EVQ_PTR_TBL_KER_B0,
+ .evq_rptr_tbl_base = EVQ_RPTR_REG_KER_B0,
+ .txd_ring_mask = FALCON_TXD_RING_MASK,
+ .rxd_ring_mask = FALCON_RXD_RING_MASK,
+ .evq_size = FALCON_EVQ_SIZE,
+ .max_dma_mask = FALCON_DMA_MASK,
+ .tx_dma_mask = FALCON_TX_DMA_MASK,
+ .bug5391_mask = 0,
+ .rx_xoff_thresh = 54272, /* ~80Kb - 3*max MTU */
+ .rx_xon_thresh = 27648, /* ~3*max MTU */
+ .rx_buffer_padding = 0,
+ .max_interrupt_mode = EFX_INT_MODE_MSIX,
+ .phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
+ * interrupt handler only supports 32
+ * channels */
+};
+
diff --git a/drivers/net/sfc/falcon.h b/drivers/net/sfc/falcon.h
new file mode 100644
index 00000000000..6117403b0c0
--- /dev/null
+++ b/drivers/net/sfc/falcon.h
@@ -0,0 +1,130 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_FALCON_H
+#define EFX_FALCON_H
+
+#include "net_driver.h"
+
+/*
+ * Falcon hardware control
+ */
+
+enum falcon_revision {
+ FALCON_REV_A0 = 0,
+ FALCON_REV_A1 = 1,
+ FALCON_REV_B0 = 2,
+};
+
+#define FALCON_REV(efx) ((efx)->pci_dev->revision)
+
+extern struct efx_nic_type falcon_a_nic_type;
+extern struct efx_nic_type falcon_b_nic_type;
+
+/**************************************************************************
+ *
+ * Externs
+ *
+ **************************************************************************
+ */
+
+/* TX data path */
+extern int falcon_probe_tx(struct efx_tx_queue *tx_queue);
+extern int falcon_init_tx(struct efx_tx_queue *tx_queue);
+extern void falcon_fini_tx(struct efx_tx_queue *tx_queue);
+extern void falcon_remove_tx(struct efx_tx_queue *tx_queue);
+extern void falcon_push_buffers(struct efx_tx_queue *tx_queue);
+
+/* RX data path */
+extern int falcon_probe_rx(struct efx_rx_queue *rx_queue);
+extern int falcon_init_rx(struct efx_rx_queue *rx_queue);
+extern void falcon_fini_rx(struct efx_rx_queue *rx_queue);
+extern void falcon_remove_rx(struct efx_rx_queue *rx_queue);
+extern void falcon_notify_rx_desc(struct efx_rx_queue *rx_queue);
+
+/* Event data path */
+extern int falcon_probe_eventq(struct efx_channel *channel);
+extern int falcon_init_eventq(struct efx_channel *channel);
+extern void falcon_fini_eventq(struct efx_channel *channel);
+extern void falcon_remove_eventq(struct efx_channel *channel);
+extern int falcon_process_eventq(struct efx_channel *channel, int *rx_quota);
+extern void falcon_eventq_read_ack(struct efx_channel *channel);
+
+/* Ports */
+extern int falcon_probe_port(struct efx_nic *efx);
+extern void falcon_remove_port(struct efx_nic *efx);
+
+/* MAC/PHY */
+extern int falcon_xaui_link_ok(struct efx_nic *efx);
+extern int falcon_dma_stats(struct efx_nic *efx,
+ unsigned int done_offset);
+extern void falcon_drain_tx_fifo(struct efx_nic *efx);
+extern void falcon_deconfigure_mac_wrapper(struct efx_nic *efx);
+extern void falcon_reconfigure_mac_wrapper(struct efx_nic *efx);
+
+/* Interrupts and test events */
+extern int falcon_init_interrupt(struct efx_nic *efx);
+extern void falcon_enable_interrupts(struct efx_nic *efx);
+extern void falcon_generate_test_event(struct efx_channel *channel,
+ unsigned int magic);
+extern void falcon_generate_interrupt(struct efx_nic *efx);
+extern void falcon_set_int_moderation(struct efx_channel *channel);
+extern void falcon_disable_interrupts(struct efx_nic *efx);
+extern void falcon_fini_interrupt(struct efx_nic *efx);
+
+/* Global Resources */
+extern int falcon_probe_nic(struct efx_nic *efx);
+extern int falcon_probe_resources(struct efx_nic *efx);
+extern int falcon_init_nic(struct efx_nic *efx);
+extern int falcon_reset_hw(struct efx_nic *efx, enum reset_type method);
+extern void falcon_remove_resources(struct efx_nic *efx);
+extern void falcon_remove_nic(struct efx_nic *efx);
+extern void falcon_update_nic_stats(struct efx_nic *efx);
+extern void falcon_set_multicast_hash(struct efx_nic *efx);
+extern int falcon_reset_xaui(struct efx_nic *efx);
+
+/**************************************************************************
+ *
+ * Falcon MAC stats
+ *
+ **************************************************************************
+ */
+
+#define FALCON_STAT_OFFSET(falcon_stat) EFX_VAL(falcon_stat, offset)
+#define FALCON_STAT_WIDTH(falcon_stat) EFX_VAL(falcon_stat, WIDTH)
+
+/* Retrieve statistic from statistics block */
+#define FALCON_STAT(efx, falcon_stat, efx_stat) do { \
+ if (FALCON_STAT_WIDTH(falcon_stat) == 16) \
+ (efx)->mac_stats.efx_stat += le16_to_cpu( \
+ *((__force __le16 *) \
+ (efx->stats_buffer.addr + \
+ FALCON_STAT_OFFSET(falcon_stat)))); \
+ else if (FALCON_STAT_WIDTH(falcon_stat) == 32) \
+ (efx)->mac_stats.efx_stat += le32_to_cpu( \
+ *((__force __le32 *) \
+ (efx->stats_buffer.addr + \
+ FALCON_STAT_OFFSET(falcon_stat)))); \
+ else \
+ (efx)->mac_stats.efx_stat += le64_to_cpu( \
+ *((__force __le64 *) \
+ (efx->stats_buffer.addr + \
+ FALCON_STAT_OFFSET(falcon_stat)))); \
+ } while (0)
+
+#define FALCON_MAC_STATS_SIZE 0x100
+
+#define MAC_DATA_LBN 0
+#define MAC_DATA_WIDTH 32
+
+extern void falcon_generate_event(struct efx_channel *channel,
+ efx_qword_t *event);
+
+#endif /* EFX_FALCON_H */
diff --git a/drivers/net/sfc/falcon_hwdefs.h b/drivers/net/sfc/falcon_hwdefs.h
new file mode 100644
index 00000000000..0485a63eaff
--- /dev/null
+++ b/drivers/net/sfc/falcon_hwdefs.h
@@ -0,0 +1,1135 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_FALCON_HWDEFS_H
+#define EFX_FALCON_HWDEFS_H
+
+/*
+ * Falcon hardware value definitions.
+ * Falcon is the internal codename for the SFC4000 controller that is
+ * present in SFE400X evaluation boards
+ */
+
+/**************************************************************************
+ *
+ * Falcon registers
+ *
+ **************************************************************************
+ */
+
+/* Address region register */
+#define ADR_REGION_REG_KER 0x00
+#define ADR_REGION0_LBN 0
+#define ADR_REGION0_WIDTH 18
+#define ADR_REGION1_LBN 32
+#define ADR_REGION1_WIDTH 18
+#define ADR_REGION2_LBN 64
+#define ADR_REGION2_WIDTH 18
+#define ADR_REGION3_LBN 96
+#define ADR_REGION3_WIDTH 18
+
+/* Interrupt enable register */
+#define INT_EN_REG_KER 0x0010
+#define KER_INT_KER_LBN 3
+#define KER_INT_KER_WIDTH 1
+#define DRV_INT_EN_KER_LBN 0
+#define DRV_INT_EN_KER_WIDTH 1
+
+/* Interrupt status address register */
+#define INT_ADR_REG_KER 0x0030
+#define NORM_INT_VEC_DIS_KER_LBN 64
+#define NORM_INT_VEC_DIS_KER_WIDTH 1
+#define INT_ADR_KER_LBN 0
+#define INT_ADR_KER_WIDTH EFX_DMA_TYPE_WIDTH(64) /* not 46 for this one */
+
+/* Interrupt status register (B0 only) */
+#define INT_ISR0_B0 0x90
+#define INT_ISR1_B0 0xA0
+
+/* Interrupt acknowledge register (A0/A1 only) */
+#define INT_ACK_REG_KER_A1 0x0050
+#define INT_ACK_DUMMY_DATA_LBN 0
+#define INT_ACK_DUMMY_DATA_WIDTH 32
+
+/* Interrupt acknowledge work-around register (A0/A1 only )*/
+#define WORK_AROUND_BROKEN_PCI_READS_REG_KER_A1 0x0070
+
+/* SPI host command register */
+#define EE_SPI_HCMD_REG_KER 0x0100
+#define EE_SPI_HCMD_CMD_EN_LBN 31
+#define EE_SPI_HCMD_CMD_EN_WIDTH 1
+#define EE_WR_TIMER_ACTIVE_LBN 28
+#define EE_WR_TIMER_ACTIVE_WIDTH 1
+#define EE_SPI_HCMD_SF_SEL_LBN 24
+#define EE_SPI_HCMD_SF_SEL_WIDTH 1
+#define EE_SPI_EEPROM 0
+#define EE_SPI_FLASH 1
+#define EE_SPI_HCMD_DABCNT_LBN 16
+#define EE_SPI_HCMD_DABCNT_WIDTH 5
+#define EE_SPI_HCMD_READ_LBN 15
+#define EE_SPI_HCMD_READ_WIDTH 1
+#define EE_SPI_READ 1
+#define EE_SPI_WRITE 0
+#define EE_SPI_HCMD_DUBCNT_LBN 12
+#define EE_SPI_HCMD_DUBCNT_WIDTH 2
+#define EE_SPI_HCMD_ADBCNT_LBN 8
+#define EE_SPI_HCMD_ADBCNT_WIDTH 2
+#define EE_SPI_HCMD_ENC_LBN 0
+#define EE_SPI_HCMD_ENC_WIDTH 8
+
+/* SPI host address register */
+#define EE_SPI_HADR_REG_KER 0x0110
+#define EE_SPI_HADR_ADR_LBN 0
+#define EE_SPI_HADR_ADR_WIDTH 24
+
+/* SPI host data register */
+#define EE_SPI_HDATA_REG_KER 0x0120
+
+/* PCIE CORE ACCESS REG */
+#define PCIE_CORE_ADDR_PCIE_DEVICE_CTRL_STAT 0x68
+#define PCIE_CORE_ADDR_PCIE_LINK_CTRL_STAT 0x70
+#define PCIE_CORE_ADDR_ACK_RPL_TIMER 0x700
+#define PCIE_CORE_ADDR_ACK_FREQ 0x70C
+
+/* NIC status register */
+#define NIC_STAT_REG 0x0200
+#define ONCHIP_SRAM_LBN 16
+#define ONCHIP_SRAM_WIDTH 1
+#define SF_PRST_LBN 9
+#define SF_PRST_WIDTH 1
+#define EE_PRST_LBN 8
+#define EE_PRST_WIDTH 1
+/* See pic_mode_t for decoding of this field */
+/* These bit definitions are extrapolated from the list of numerical
+ * values for STRAP_PINS.
+ */
+#define STRAP_10G_LBN 2
+#define STRAP_10G_WIDTH 1
+#define STRAP_PCIE_LBN 0
+#define STRAP_PCIE_WIDTH 1
+
+/* GPIO control register */
+#define GPIO_CTL_REG_KER 0x0210
+#define GPIO_OUTPUTS_LBN (16)
+#define GPIO_OUTPUTS_WIDTH (4)
+#define GPIO_INPUTS_LBN (8)
+#define GPIO_DIRECTION_LBN (24)
+#define GPIO_DIRECTION_WIDTH (4)
+#define GPIO_DIRECTION_OUT (1)
+#define GPIO_SRAM_SLEEP (1 << 1)
+
+#define GPIO3_OEN_LBN (GPIO_DIRECTION_LBN + 3)
+#define GPIO3_OEN_WIDTH 1
+#define GPIO2_OEN_LBN (GPIO_DIRECTION_LBN + 2)
+#define GPIO2_OEN_WIDTH 1
+#define GPIO1_OEN_LBN (GPIO_DIRECTION_LBN + 1)
+#define GPIO1_OEN_WIDTH 1
+#define GPIO0_OEN_LBN (GPIO_DIRECTION_LBN + 0)
+#define GPIO0_OEN_WIDTH 1
+
+#define GPIO3_OUT_LBN (GPIO_OUTPUTS_LBN + 3)
+#define GPIO3_OUT_WIDTH 1
+#define GPIO2_OUT_LBN (GPIO_OUTPUTS_LBN + 2)
+#define GPIO2_OUT_WIDTH 1
+#define GPIO1_OUT_LBN (GPIO_OUTPUTS_LBN + 1)
+#define GPIO1_OUT_WIDTH 1
+#define GPIO0_OUT_LBN (GPIO_OUTPUTS_LBN + 0)
+#define GPIO0_OUT_WIDTH 1
+
+#define GPIO3_IN_LBN (GPIO_INPUTS_LBN + 3)
+#define GPIO3_IN_WIDTH 1
+#define GPIO2_IN_WIDTH 1
+#define GPIO1_IN_WIDTH 1
+#define GPIO0_IN_LBN (GPIO_INPUTS_LBN + 0)
+#define GPIO0_IN_WIDTH 1
+
+/* Global control register */
+#define GLB_CTL_REG_KER 0x0220
+#define EXT_PHY_RST_CTL_LBN 63
+#define EXT_PHY_RST_CTL_WIDTH 1
+#define PCIE_SD_RST_CTL_LBN 61
+#define PCIE_SD_RST_CTL_WIDTH 1
+
+#define PCIE_NSTCK_RST_CTL_LBN 58
+#define PCIE_NSTCK_RST_CTL_WIDTH 1
+#define PCIE_CORE_RST_CTL_LBN 57
+#define PCIE_CORE_RST_CTL_WIDTH 1
+#define EE_RST_CTL_LBN 49
+#define EE_RST_CTL_WIDTH 1
+#define RST_XGRX_LBN 24
+#define RST_XGRX_WIDTH 1
+#define RST_XGTX_LBN 23
+#define RST_XGTX_WIDTH 1
+#define RST_EM_LBN 22
+#define RST_EM_WIDTH 1
+#define EXT_PHY_RST_DUR_LBN 1
+#define EXT_PHY_RST_DUR_WIDTH 3
+#define SWRST_LBN 0
+#define SWRST_WIDTH 1
+#define INCLUDE_IN_RESET 0
+#define EXCLUDE_FROM_RESET 1
+
+/* Fatal interrupt register */
+#define FATAL_INTR_REG_KER 0x0230
+#define RBUF_OWN_INT_KER_EN_LBN 39
+#define RBUF_OWN_INT_KER_EN_WIDTH 1
+#define TBUF_OWN_INT_KER_EN_LBN 38
+#define TBUF_OWN_INT_KER_EN_WIDTH 1
+#define ILL_ADR_INT_KER_EN_LBN 33
+#define ILL_ADR_INT_KER_EN_WIDTH 1
+#define MEM_PERR_INT_KER_LBN 8
+#define MEM_PERR_INT_KER_WIDTH 1
+#define INT_KER_ERROR_LBN 0
+#define INT_KER_ERROR_WIDTH 12
+
+#define DP_CTRL_REG 0x250
+#define FLS_EVQ_ID_LBN 0
+#define FLS_EVQ_ID_WIDTH 11
+
+#define MEM_STAT_REG_KER 0x260
+
+/* Debug probe register */
+#define DEBUG_BLK_SEL_MISC 7
+#define DEBUG_BLK_SEL_SERDES 6
+#define DEBUG_BLK_SEL_EM 5
+#define DEBUG_BLK_SEL_SR 4
+#define DEBUG_BLK_SEL_EV 3
+#define DEBUG_BLK_SEL_RX 2
+#define DEBUG_BLK_SEL_TX 1
+#define DEBUG_BLK_SEL_BIU 0
+
+/* FPGA build version */
+#define ALTERA_BUILD_REG_KER 0x0300
+#define VER_ALL_LBN 0
+#define VER_ALL_WIDTH 32
+
+/* Spare EEPROM bits register (flash 0x390) */
+#define SPARE_REG_KER 0x310
+#define MEM_PERR_EN_TX_DATA_LBN 72
+#define MEM_PERR_EN_TX_DATA_WIDTH 2
+
+/* Timer table for kernel access */
+#define TIMER_CMD_REG_KER 0x420
+#define TIMER_MODE_LBN 12
+#define TIMER_MODE_WIDTH 2
+#define TIMER_MODE_DIS 0
+#define TIMER_MODE_INT_HLDOFF 2
+#define TIMER_VAL_LBN 0
+#define TIMER_VAL_WIDTH 12
+
+/* Driver generated event register */
+#define DRV_EV_REG_KER 0x440
+#define DRV_EV_QID_LBN 64
+#define DRV_EV_QID_WIDTH 12
+#define DRV_EV_DATA_LBN 0
+#define DRV_EV_DATA_WIDTH 64
+
+/* Buffer table configuration register */
+#define BUF_TBL_CFG_REG_KER 0x600
+#define BUF_TBL_MODE_LBN 3
+#define BUF_TBL_MODE_WIDTH 1
+#define BUF_TBL_MODE_HALF 0
+#define BUF_TBL_MODE_FULL 1
+
+/* SRAM receive descriptor cache configuration register */
+#define SRM_RX_DC_CFG_REG_KER 0x610
+#define SRM_RX_DC_BASE_ADR_LBN 0
+#define SRM_RX_DC_BASE_ADR_WIDTH 21
+
+/* SRAM transmit descriptor cache configuration register */
+#define SRM_TX_DC_CFG_REG_KER 0x620
+#define SRM_TX_DC_BASE_ADR_LBN 0
+#define SRM_TX_DC_BASE_ADR_WIDTH 21
+
+/* SRAM configuration register */
+#define SRM_CFG_REG_KER 0x630
+#define SRAM_OOB_BT_INIT_EN_LBN 3
+#define SRAM_OOB_BT_INIT_EN_WIDTH 1
+#define SRM_NUM_BANKS_AND_BANK_SIZE_LBN 0
+#define SRM_NUM_BANKS_AND_BANK_SIZE_WIDTH 3
+#define SRM_NB_BSZ_1BANKS_2M 0
+#define SRM_NB_BSZ_1BANKS_4M 1
+#define SRM_NB_BSZ_1BANKS_8M 2
+#define SRM_NB_BSZ_DEFAULT 3 /* char driver will set the default */
+#define SRM_NB_BSZ_2BANKS_4M 4
+#define SRM_NB_BSZ_2BANKS_8M 5
+#define SRM_NB_BSZ_2BANKS_16M 6
+#define SRM_NB_BSZ_RESERVED 7
+
+/* Special buffer table update register */
+#define BUF_TBL_UPD_REG_KER 0x0650
+#define BUF_UPD_CMD_LBN 63
+#define BUF_UPD_CMD_WIDTH 1
+#define BUF_CLR_CMD_LBN 62
+#define BUF_CLR_CMD_WIDTH 1
+#define BUF_CLR_END_ID_LBN 32
+#define BUF_CLR_END_ID_WIDTH 20
+#define BUF_CLR_START_ID_LBN 0
+#define BUF_CLR_START_ID_WIDTH 20
+
+/* Receive configuration register */
+#define RX_CFG_REG_KER 0x800
+
+/* B0 */
+#define RX_INGR_EN_B0_LBN 47
+#define RX_INGR_EN_B0_WIDTH 1
+#define RX_DESC_PUSH_EN_B0_LBN 43
+#define RX_DESC_PUSH_EN_B0_WIDTH 1
+#define RX_XON_TX_TH_B0_LBN 33
+#define RX_XON_TX_TH_B0_WIDTH 5
+#define RX_XOFF_TX_TH_B0_LBN 28
+#define RX_XOFF_TX_TH_B0_WIDTH 5
+#define RX_USR_BUF_SIZE_B0_LBN 19
+#define RX_USR_BUF_SIZE_B0_WIDTH 9
+#define RX_XON_MAC_TH_B0_LBN 10
+#define RX_XON_MAC_TH_B0_WIDTH 9
+#define RX_XOFF_MAC_TH_B0_LBN 1
+#define RX_XOFF_MAC_TH_B0_WIDTH 9
+#define RX_XOFF_MAC_EN_B0_LBN 0
+#define RX_XOFF_MAC_EN_B0_WIDTH 1
+
+/* A1 */
+#define RX_DESC_PUSH_EN_A1_LBN 35
+#define RX_DESC_PUSH_EN_A1_WIDTH 1
+#define RX_XON_TX_TH_A1_LBN 25
+#define RX_XON_TX_TH_A1_WIDTH 5
+#define RX_XOFF_TX_TH_A1_LBN 20
+#define RX_XOFF_TX_TH_A1_WIDTH 5
+#define RX_USR_BUF_SIZE_A1_LBN 11
+#define RX_USR_BUF_SIZE_A1_WIDTH 9
+#define RX_XON_MAC_TH_A1_LBN 6
+#define RX_XON_MAC_TH_A1_WIDTH 5
+#define RX_XOFF_MAC_TH_A1_LBN 1
+#define RX_XOFF_MAC_TH_A1_WIDTH 5
+#define RX_XOFF_MAC_EN_A1_LBN 0
+#define RX_XOFF_MAC_EN_A1_WIDTH 1
+
+/* Receive filter control register */
+#define RX_FILTER_CTL_REG 0x810
+#define UDP_FULL_SRCH_LIMIT_LBN 32
+#define UDP_FULL_SRCH_LIMIT_WIDTH 8
+#define NUM_KER_LBN 24
+#define NUM_KER_WIDTH 2
+#define UDP_WILD_SRCH_LIMIT_LBN 16
+#define UDP_WILD_SRCH_LIMIT_WIDTH 8
+#define TCP_WILD_SRCH_LIMIT_LBN 8
+#define TCP_WILD_SRCH_LIMIT_WIDTH 8
+#define TCP_FULL_SRCH_LIMIT_LBN 0
+#define TCP_FULL_SRCH_LIMIT_WIDTH 8
+
+/* RX queue flush register */
+#define RX_FLUSH_DESCQ_REG_KER 0x0820
+#define RX_FLUSH_DESCQ_CMD_LBN 24
+#define RX_FLUSH_DESCQ_CMD_WIDTH 1
+#define RX_FLUSH_DESCQ_LBN 0
+#define RX_FLUSH_DESCQ_WIDTH 12
+
+/* Receive descriptor update register */
+#define RX_DESC_UPD_REG_KER_DWORD (0x830 + 12)
+#define RX_DESC_WPTR_DWORD_LBN 0
+#define RX_DESC_WPTR_DWORD_WIDTH 12
+
+/* Receive descriptor cache configuration register */
+#define RX_DC_CFG_REG_KER 0x840
+#define RX_DC_SIZE_LBN 0
+#define RX_DC_SIZE_WIDTH 2
+
+#define RX_DC_PF_WM_REG_KER 0x850
+#define RX_DC_PF_LWM_LBN 0
+#define RX_DC_PF_LWM_WIDTH 6
+
+/* RX no descriptor drop counter */
+#define RX_NODESC_DROP_REG_KER 0x880
+#define RX_NODESC_DROP_CNT_LBN 0
+#define RX_NODESC_DROP_CNT_WIDTH 16
+
+/* RX black magic register */
+#define RX_SELF_RST_REG_KER 0x890
+#define RX_ISCSI_DIS_LBN 17
+#define RX_ISCSI_DIS_WIDTH 1
+#define RX_NODESC_WAIT_DIS_LBN 9
+#define RX_NODESC_WAIT_DIS_WIDTH 1
+#define RX_RECOVERY_EN_LBN 8
+#define RX_RECOVERY_EN_WIDTH 1
+
+/* TX queue flush register */
+#define TX_FLUSH_DESCQ_REG_KER 0x0a00
+#define TX_FLUSH_DESCQ_CMD_LBN 12
+#define TX_FLUSH_DESCQ_CMD_WIDTH 1
+#define TX_FLUSH_DESCQ_LBN 0
+#define TX_FLUSH_DESCQ_WIDTH 12
+
+/* Transmit descriptor update register */
+#define TX_DESC_UPD_REG_KER_DWORD (0xa10 + 12)
+#define TX_DESC_WPTR_DWORD_LBN 0
+#define TX_DESC_WPTR_DWORD_WIDTH 12
+
+/* Transmit descriptor cache configuration register */
+#define TX_DC_CFG_REG_KER 0xa20
+#define TX_DC_SIZE_LBN 0
+#define TX_DC_SIZE_WIDTH 2
+
+/* Transmit checksum configuration register (A0/A1 only) */
+#define TX_CHKSM_CFG_REG_KER_A1 0xa30
+
+/* Transmit configuration register */
+#define TX_CFG_REG_KER 0xa50
+#define TX_NO_EOP_DISC_EN_LBN 5
+#define TX_NO_EOP_DISC_EN_WIDTH 1
+
+/* Transmit configuration register 2 */
+#define TX_CFG2_REG_KER 0xa80
+#define TX_CSR_PUSH_EN_LBN 89
+#define TX_CSR_PUSH_EN_WIDTH 1
+#define TX_RX_SPACER_LBN 64
+#define TX_RX_SPACER_WIDTH 8
+#define TX_SW_EV_EN_LBN 59
+#define TX_SW_EV_EN_WIDTH 1
+#define TX_RX_SPACER_EN_LBN 57
+#define TX_RX_SPACER_EN_WIDTH 1
+#define TX_PREF_THRESHOLD_LBN 19
+#define TX_PREF_THRESHOLD_WIDTH 2
+#define TX_ONE_PKT_PER_Q_LBN 18
+#define TX_ONE_PKT_PER_Q_WIDTH 1
+#define TX_DIS_NON_IP_EV_LBN 17
+#define TX_DIS_NON_IP_EV_WIDTH 1
+#define TX_FLUSH_MIN_LEN_EN_B0_LBN 7
+#define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1
+
+/* PHY management transmit data register */
+#define MD_TXD_REG_KER 0xc00
+#define MD_TXD_LBN 0
+#define MD_TXD_WIDTH 16
+
+/* PHY management receive data register */
+#define MD_RXD_REG_KER 0xc10
+#define MD_RXD_LBN 0
+#define MD_RXD_WIDTH 16
+
+/* PHY management configuration & status register */
+#define MD_CS_REG_KER 0xc20
+#define MD_GC_LBN 4
+#define MD_GC_WIDTH 1
+#define MD_RIC_LBN 2
+#define MD_RIC_WIDTH 1
+#define MD_RDC_LBN 1
+#define MD_RDC_WIDTH 1
+#define MD_WRC_LBN 0
+#define MD_WRC_WIDTH 1
+
+/* PHY management PHY address register */
+#define MD_PHY_ADR_REG_KER 0xc30
+#define MD_PHY_ADR_LBN 0
+#define MD_PHY_ADR_WIDTH 16
+
+/* PHY management ID register */
+#define MD_ID_REG_KER 0xc40
+#define MD_PRT_ADR_LBN 11
+#define MD_PRT_ADR_WIDTH 5
+#define MD_DEV_ADR_LBN 6
+#define MD_DEV_ADR_WIDTH 5
+/* Used for writing both at once */
+#define MD_PRT_DEV_ADR_LBN 6
+#define MD_PRT_DEV_ADR_WIDTH 10
+
+/* PHY management status & mask register (DWORD read only) */
+#define MD_STAT_REG_KER 0xc50
+#define MD_BSERR_LBN 2
+#define MD_BSERR_WIDTH 1
+#define MD_LNFL_LBN 1
+#define MD_LNFL_WIDTH 1
+#define MD_BSY_LBN 0
+#define MD_BSY_WIDTH 1
+
+/* Port 0 and 1 MAC stats registers */
+#define MAC0_STAT_DMA_REG_KER 0xc60
+#define MAC_STAT_DMA_CMD_LBN 48
+#define MAC_STAT_DMA_CMD_WIDTH 1
+#define MAC_STAT_DMA_ADR_LBN 0
+#define MAC_STAT_DMA_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46)
+
+/* Port 0 and 1 MAC control registers */
+#define MAC0_CTRL_REG_KER 0xc80
+#define MAC_XOFF_VAL_LBN 16
+#define MAC_XOFF_VAL_WIDTH 16
+#define TXFIFO_DRAIN_EN_B0_LBN 7
+#define TXFIFO_DRAIN_EN_B0_WIDTH 1
+#define MAC_BCAD_ACPT_LBN 4
+#define MAC_BCAD_ACPT_WIDTH 1
+#define MAC_UC_PROM_LBN 3
+#define MAC_UC_PROM_WIDTH 1
+#define MAC_LINK_STATUS_LBN 2
+#define MAC_LINK_STATUS_WIDTH 1
+#define MAC_SPEED_LBN 0
+#define MAC_SPEED_WIDTH 2
+
+/* 10G XAUI XGXS default values */
+#define XX_TXDRV_DEQ_DEFAULT 0xe /* deq=.6 */
+#define XX_TXDRV_DTX_DEFAULT 0x5 /* 1.25 */
+#define XX_SD_CTL_DRV_DEFAULT 0 /* 20mA */
+
+/* Multicast address hash table */
+#define MAC_MCAST_HASH_REG0_KER 0xca0
+#define MAC_MCAST_HASH_REG1_KER 0xcb0
+
+/* GMAC registers */
+#define FALCON_GMAC_REGBANK 0xe00
+#define FALCON_GMAC_REGBANK_SIZE 0x200
+#define FALCON_GMAC_REG_SIZE 0x10
+
+/* XMAC registers */
+#define FALCON_XMAC_REGBANK 0x1200
+#define FALCON_XMAC_REGBANK_SIZE 0x200
+#define FALCON_XMAC_REG_SIZE 0x10
+
+/* XGMAC address register low */
+#define XM_ADR_LO_REG_MAC 0x00
+#define XM_ADR_3_LBN 24
+#define XM_ADR_3_WIDTH 8
+#define XM_ADR_2_LBN 16
+#define XM_ADR_2_WIDTH 8
+#define XM_ADR_1_LBN 8
+#define XM_ADR_1_WIDTH 8
+#define XM_ADR_0_LBN 0
+#define XM_ADR_0_WIDTH 8
+
+/* XGMAC address register high */
+#define XM_ADR_HI_REG_MAC 0x01
+#define XM_ADR_5_LBN 8
+#define XM_ADR_5_WIDTH 8
+#define XM_ADR_4_LBN 0
+#define XM_ADR_4_WIDTH 8
+
+/* XGMAC global configuration */
+#define XM_GLB_CFG_REG_MAC 0x02
+#define XM_RX_STAT_EN_LBN 11
+#define XM_RX_STAT_EN_WIDTH 1
+#define XM_TX_STAT_EN_LBN 10
+#define XM_TX_STAT_EN_WIDTH 1
+#define XM_RX_JUMBO_MODE_LBN 6
+#define XM_RX_JUMBO_MODE_WIDTH 1
+#define XM_INTCLR_MODE_LBN 3
+#define XM_INTCLR_MODE_WIDTH 1
+#define XM_CORE_RST_LBN 0
+#define XM_CORE_RST_WIDTH 1
+
+/* XGMAC transmit configuration */
+#define XM_TX_CFG_REG_MAC 0x03
+#define XM_IPG_LBN 16
+#define XM_IPG_WIDTH 4
+#define XM_FCNTL_LBN 10
+#define XM_FCNTL_WIDTH 1
+#define XM_TXCRC_LBN 8
+#define XM_TXCRC_WIDTH 1
+#define XM_AUTO_PAD_LBN 5
+#define XM_AUTO_PAD_WIDTH 1
+#define XM_TX_PRMBL_LBN 2
+#define XM_TX_PRMBL_WIDTH 1
+#define XM_TXEN_LBN 1
+#define XM_TXEN_WIDTH 1
+
+/* XGMAC receive configuration */
+#define XM_RX_CFG_REG_MAC 0x04
+#define XM_PASS_CRC_ERR_LBN 25
+#define XM_PASS_CRC_ERR_WIDTH 1
+#define XM_ACPT_ALL_MCAST_LBN 11
+#define XM_ACPT_ALL_MCAST_WIDTH 1
+#define XM_ACPT_ALL_UCAST_LBN 9
+#define XM_ACPT_ALL_UCAST_WIDTH 1
+#define XM_AUTO_DEPAD_LBN 8
+#define XM_AUTO_DEPAD_WIDTH 1
+#define XM_RXEN_LBN 1
+#define XM_RXEN_WIDTH 1
+
+/* XGMAC management interrupt mask register */
+#define XM_MGT_INT_MSK_REG_MAC_B0 0x5
+#define XM_MSK_PRMBLE_ERR_LBN 2
+#define XM_MSK_PRMBLE_ERR_WIDTH 1
+#define XM_MSK_RMTFLT_LBN 1
+#define XM_MSK_RMTFLT_WIDTH 1
+#define XM_MSK_LCLFLT_LBN 0
+#define XM_MSK_LCLFLT_WIDTH 1
+
+/* XGMAC flow control register */
+#define XM_FC_REG_MAC 0x7
+#define XM_PAUSE_TIME_LBN 16
+#define XM_PAUSE_TIME_WIDTH 16
+#define XM_DIS_FCNTL_LBN 0
+#define XM_DIS_FCNTL_WIDTH 1
+
+/* XGMAC pause time count register */
+#define XM_PAUSE_TIME_REG_MAC 0x9
+
+/* XGMAC transmit parameter register */
+#define XM_TX_PARAM_REG_MAC 0x0d
+#define XM_TX_JUMBO_MODE_LBN 31
+#define XM_TX_JUMBO_MODE_WIDTH 1
+#define XM_MAX_TX_FRM_SIZE_LBN 16
+#define XM_MAX_TX_FRM_SIZE_WIDTH 14
+
+/* XGMAC receive parameter register */
+#define XM_RX_PARAM_REG_MAC 0x0e
+#define XM_MAX_RX_FRM_SIZE_LBN 0
+#define XM_MAX_RX_FRM_SIZE_WIDTH 14
+
+/* XGMAC management interrupt status register */
+#define XM_MGT_INT_REG_MAC_B0 0x0f
+#define XM_PRMBLE_ERR 2
+#define XM_PRMBLE_WIDTH 1
+#define XM_RMTFLT_LBN 1
+#define XM_RMTFLT_WIDTH 1
+#define XM_LCLFLT_LBN 0
+#define XM_LCLFLT_WIDTH 1
+
+/* XGXS/XAUI powerdown/reset register */
+#define XX_PWR_RST_REG_MAC 0x10
+
+#define XX_PWRDND_EN_LBN 15
+#define XX_PWRDND_EN_WIDTH 1
+#define XX_PWRDNC_EN_LBN 14
+#define XX_PWRDNC_EN_WIDTH 1
+#define XX_PWRDNB_EN_LBN 13
+#define XX_PWRDNB_EN_WIDTH 1
+#define XX_PWRDNA_EN_LBN 12
+#define XX_PWRDNA_EN_WIDTH 1
+#define XX_RSTPLLCD_EN_LBN 9
+#define XX_RSTPLLCD_EN_WIDTH 1
+#define XX_RSTPLLAB_EN_LBN 8
+#define XX_RSTPLLAB_EN_WIDTH 1
+#define XX_RESETD_EN_LBN 7
+#define XX_RESETD_EN_WIDTH 1
+#define XX_RESETC_EN_LBN 6
+#define XX_RESETC_EN_WIDTH 1
+#define XX_RESETB_EN_LBN 5
+#define XX_RESETB_EN_WIDTH 1
+#define XX_RESETA_EN_LBN 4
+#define XX_RESETA_EN_WIDTH 1
+#define XX_RSTXGXSRX_EN_LBN 2
+#define XX_RSTXGXSRX_EN_WIDTH 1
+#define XX_RSTXGXSTX_EN_LBN 1
+#define XX_RSTXGXSTX_EN_WIDTH 1
+#define XX_RST_XX_EN_LBN 0
+#define XX_RST_XX_EN_WIDTH 1
+
+/* XGXS/XAUI powerdown/reset control register */
+#define XX_SD_CTL_REG_MAC 0x11
+#define XX_HIDRVD_LBN 15
+#define XX_HIDRVD_WIDTH 1
+#define XX_LODRVD_LBN 14
+#define XX_LODRVD_WIDTH 1
+#define XX_HIDRVC_LBN 13
+#define XX_HIDRVC_WIDTH 1
+#define XX_LODRVC_LBN 12
+#define XX_LODRVC_WIDTH 1
+#define XX_HIDRVB_LBN 11
+#define XX_HIDRVB_WIDTH 1
+#define XX_LODRVB_LBN 10
+#define XX_LODRVB_WIDTH 1
+#define XX_HIDRVA_LBN 9
+#define XX_HIDRVA_WIDTH 1
+#define XX_LODRVA_LBN 8
+#define XX_LODRVA_WIDTH 1
+
+#define XX_TXDRV_CTL_REG_MAC 0x12
+#define XX_DEQD_LBN 28
+#define XX_DEQD_WIDTH 4
+#define XX_DEQC_LBN 24
+#define XX_DEQC_WIDTH 4
+#define XX_DEQB_LBN 20
+#define XX_DEQB_WIDTH 4
+#define XX_DEQA_LBN 16
+#define XX_DEQA_WIDTH 4
+#define XX_DTXD_LBN 12
+#define XX_DTXD_WIDTH 4
+#define XX_DTXC_LBN 8
+#define XX_DTXC_WIDTH 4
+#define XX_DTXB_LBN 4
+#define XX_DTXB_WIDTH 4
+#define XX_DTXA_LBN 0
+#define XX_DTXA_WIDTH 4
+
+/* XAUI XGXS core status register */
+#define XX_FORCE_SIG_DECODE_FORCED 0xff
+#define XX_CORE_STAT_REG_MAC 0x16
+#define XX_ALIGN_DONE_LBN 20
+#define XX_ALIGN_DONE_WIDTH 1
+#define XX_SYNC_STAT_LBN 16
+#define XX_SYNC_STAT_WIDTH 4
+#define XX_SYNC_STAT_DECODE_SYNCED 0xf
+#define XX_COMMA_DET_LBN 12
+#define XX_COMMA_DET_WIDTH 4
+#define XX_COMMA_DET_DECODE_DETECTED 0xf
+#define XX_COMMA_DET_RESET 0xf
+#define XX_CHARERR_LBN 4
+#define XX_CHARERR_WIDTH 4
+#define XX_CHARERR_RESET 0xf
+#define XX_DISPERR_LBN 0
+#define XX_DISPERR_WIDTH 4
+#define XX_DISPERR_RESET 0xf
+
+/* Receive filter table */
+#define RX_FILTER_TBL0 0xF00000
+
+/* Receive descriptor pointer table */
+#define RX_DESC_PTR_TBL_KER_A1 0x11800
+#define RX_DESC_PTR_TBL_KER_B0 0xF40000
+#define RX_DESC_PTR_TBL_KER_P0 0x900
+#define RX_ISCSI_DDIG_EN_LBN 88
+#define RX_ISCSI_DDIG_EN_WIDTH 1
+#define RX_ISCSI_HDIG_EN_LBN 87
+#define RX_ISCSI_HDIG_EN_WIDTH 1
+#define RX_DESCQ_BUF_BASE_ID_LBN 36
+#define RX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define RX_DESCQ_EVQ_ID_LBN 24
+#define RX_DESCQ_EVQ_ID_WIDTH 12
+#define RX_DESCQ_OWNER_ID_LBN 10
+#define RX_DESCQ_OWNER_ID_WIDTH 14
+#define RX_DESCQ_LABEL_LBN 5
+#define RX_DESCQ_LABEL_WIDTH 5
+#define RX_DESCQ_SIZE_LBN 3
+#define RX_DESCQ_SIZE_WIDTH 2
+#define RX_DESCQ_SIZE_4K 3
+#define RX_DESCQ_SIZE_2K 2
+#define RX_DESCQ_SIZE_1K 1
+#define RX_DESCQ_SIZE_512 0
+#define RX_DESCQ_TYPE_LBN 2
+#define RX_DESCQ_TYPE_WIDTH 1
+#define RX_DESCQ_JUMBO_LBN 1
+#define RX_DESCQ_JUMBO_WIDTH 1
+#define RX_DESCQ_EN_LBN 0
+#define RX_DESCQ_EN_WIDTH 1
+
+/* Transmit descriptor pointer table */
+#define TX_DESC_PTR_TBL_KER_A1 0x11900
+#define TX_DESC_PTR_TBL_KER_B0 0xF50000
+#define TX_DESC_PTR_TBL_KER_P0 0xa40
+#define TX_NON_IP_DROP_DIS_B0_LBN 91
+#define TX_NON_IP_DROP_DIS_B0_WIDTH 1
+#define TX_IP_CHKSM_DIS_B0_LBN 90
+#define TX_IP_CHKSM_DIS_B0_WIDTH 1
+#define TX_TCP_CHKSM_DIS_B0_LBN 89
+#define TX_TCP_CHKSM_DIS_B0_WIDTH 1
+#define TX_DESCQ_EN_LBN 88
+#define TX_DESCQ_EN_WIDTH 1
+#define TX_ISCSI_DDIG_EN_LBN 87
+#define TX_ISCSI_DDIG_EN_WIDTH 1
+#define TX_ISCSI_HDIG_EN_LBN 86
+#define TX_ISCSI_HDIG_EN_WIDTH 1
+#define TX_DESCQ_BUF_BASE_ID_LBN 36
+#define TX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define TX_DESCQ_EVQ_ID_LBN 24
+#define TX_DESCQ_EVQ_ID_WIDTH 12
+#define TX_DESCQ_OWNER_ID_LBN 10
+#define TX_DESCQ_OWNER_ID_WIDTH 14
+#define TX_DESCQ_LABEL_LBN 5
+#define TX_DESCQ_LABEL_WIDTH 5
+#define TX_DESCQ_SIZE_LBN 3
+#define TX_DESCQ_SIZE_WIDTH 2
+#define TX_DESCQ_SIZE_4K 3
+#define TX_DESCQ_SIZE_2K 2
+#define TX_DESCQ_SIZE_1K 1
+#define TX_DESCQ_SIZE_512 0
+#define TX_DESCQ_TYPE_LBN 1
+#define TX_DESCQ_TYPE_WIDTH 2
+
+/* Event queue pointer */
+#define EVQ_PTR_TBL_KER_A1 0x11a00
+#define EVQ_PTR_TBL_KER_B0 0xf60000
+#define EVQ_PTR_TBL_KER_P0 0x500
+#define EVQ_EN_LBN 23
+#define EVQ_EN_WIDTH 1
+#define EVQ_SIZE_LBN 20
+#define EVQ_SIZE_WIDTH 3
+#define EVQ_SIZE_32K 6
+#define EVQ_SIZE_16K 5
+#define EVQ_SIZE_8K 4
+#define EVQ_SIZE_4K 3
+#define EVQ_SIZE_2K 2
+#define EVQ_SIZE_1K 1
+#define EVQ_SIZE_512 0
+#define EVQ_BUF_BASE_ID_LBN 0
+#define EVQ_BUF_BASE_ID_WIDTH 20
+
+/* Event queue read pointer */
+#define EVQ_RPTR_REG_KER_A1 0x11b00
+#define EVQ_RPTR_REG_KER_B0 0xfa0000
+#define EVQ_RPTR_REG_KER_DWORD (EVQ_RPTR_REG_KER + 0)
+#define EVQ_RPTR_DWORD_LBN 0
+#define EVQ_RPTR_DWORD_WIDTH 14
+
+/* RSS indirection table */
+#define RX_RSS_INDIR_TBL_B0 0xFB0000
+#define RX_RSS_INDIR_ENT_B0_LBN 0
+#define RX_RSS_INDIR_ENT_B0_WIDTH 6
+
+/* Special buffer descriptors (full-mode) */
+#define BUF_FULL_TBL_KER_A1 0x8000
+#define BUF_FULL_TBL_KER_B0 0x800000
+#define IP_DAT_BUF_SIZE_LBN 50
+#define IP_DAT_BUF_SIZE_WIDTH 1
+#define IP_DAT_BUF_SIZE_8K 1
+#define IP_DAT_BUF_SIZE_4K 0
+#define BUF_ADR_REGION_LBN 48
+#define BUF_ADR_REGION_WIDTH 2
+#define BUF_ADR_FBUF_LBN 14
+#define BUF_ADR_FBUF_WIDTH 34
+#define BUF_OWNER_ID_FBUF_LBN 0
+#define BUF_OWNER_ID_FBUF_WIDTH 14
+
+/* Transmit descriptor */
+#define TX_KER_PORT_LBN 63
+#define TX_KER_PORT_WIDTH 1
+#define TX_KER_CONT_LBN 62
+#define TX_KER_CONT_WIDTH 1
+#define TX_KER_BYTE_CNT_LBN 48
+#define TX_KER_BYTE_CNT_WIDTH 14
+#define TX_KER_BUF_REGION_LBN 46
+#define TX_KER_BUF_REGION_WIDTH 2
+#define TX_KER_BUF_REGION0_DECODE 0
+#define TX_KER_BUF_REGION1_DECODE 1
+#define TX_KER_BUF_REGION2_DECODE 2
+#define TX_KER_BUF_REGION3_DECODE 3
+#define TX_KER_BUF_ADR_LBN 0
+#define TX_KER_BUF_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46)
+
+/* Receive descriptor */
+#define RX_KER_BUF_SIZE_LBN 48
+#define RX_KER_BUF_SIZE_WIDTH 14
+#define RX_KER_BUF_REGION_LBN 46
+#define RX_KER_BUF_REGION_WIDTH 2
+#define RX_KER_BUF_REGION0_DECODE 0
+#define RX_KER_BUF_REGION1_DECODE 1
+#define RX_KER_BUF_REGION2_DECODE 2
+#define RX_KER_BUF_REGION3_DECODE 3
+#define RX_KER_BUF_ADR_LBN 0
+#define RX_KER_BUF_ADR_WIDTH EFX_DMA_TYPE_WIDTH(46)
+
+/**************************************************************************
+ *
+ * Falcon events
+ *
+ **************************************************************************
+ */
+
+/* Event queue entries */
+#define EV_CODE_LBN 60
+#define EV_CODE_WIDTH 4
+#define RX_IP_EV_DECODE 0
+#define TX_IP_EV_DECODE 2
+#define DRIVER_EV_DECODE 5
+#define GLOBAL_EV_DECODE 6
+#define DRV_GEN_EV_DECODE 7
+#define WHOLE_EVENT_LBN 0
+#define WHOLE_EVENT_WIDTH 64
+
+/* Receive events */
+#define RX_EV_PKT_OK_LBN 56
+#define RX_EV_PKT_OK_WIDTH 1
+#define RX_EV_PAUSE_FRM_ERR_LBN 55
+#define RX_EV_PAUSE_FRM_ERR_WIDTH 1
+#define RX_EV_BUF_OWNER_ID_ERR_LBN 54
+#define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+#define RX_EV_IF_FRAG_ERR_LBN 53
+#define RX_EV_IF_FRAG_ERR_WIDTH 1
+#define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
+#define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
+#define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
+#define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
+#define RX_EV_ETH_CRC_ERR_LBN 50
+#define RX_EV_ETH_CRC_ERR_WIDTH 1
+#define RX_EV_FRM_TRUNC_LBN 49
+#define RX_EV_FRM_TRUNC_WIDTH 1
+#define RX_EV_DRIB_NIB_LBN 48
+#define RX_EV_DRIB_NIB_WIDTH 1
+#define RX_EV_TOBE_DISC_LBN 47
+#define RX_EV_TOBE_DISC_WIDTH 1
+#define RX_EV_PKT_TYPE_LBN 44
+#define RX_EV_PKT_TYPE_WIDTH 3
+#define RX_EV_PKT_TYPE_ETH_DECODE 0
+#define RX_EV_PKT_TYPE_LLC_DECODE 1
+#define RX_EV_PKT_TYPE_JUMBO_DECODE 2
+#define RX_EV_PKT_TYPE_VLAN_DECODE 3
+#define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4
+#define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5
+#define RX_EV_HDR_TYPE_LBN 42
+#define RX_EV_HDR_TYPE_WIDTH 2
+#define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0
+#define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1
+#define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2
+#define RX_EV_HDR_TYPE_NON_IP_DECODE 3
+#define RX_EV_HDR_TYPE_HAS_CHECKSUMS(hdr_type) \
+ ((hdr_type) <= RX_EV_HDR_TYPE_UDP_IPV4_DECODE)
+#define RX_EV_MCAST_HASH_MATCH_LBN 40
+#define RX_EV_MCAST_HASH_MATCH_WIDTH 1
+#define RX_EV_MCAST_PKT_LBN 39
+#define RX_EV_MCAST_PKT_WIDTH 1
+#define RX_EV_Q_LABEL_LBN 32
+#define RX_EV_Q_LABEL_WIDTH 5
+#define RX_EV_JUMBO_CONT_LBN 31
+#define RX_EV_JUMBO_CONT_WIDTH 1
+#define RX_EV_BYTE_CNT_LBN 16
+#define RX_EV_BYTE_CNT_WIDTH 14
+#define RX_EV_SOP_LBN 15
+#define RX_EV_SOP_WIDTH 1
+#define RX_EV_DESC_PTR_LBN 0
+#define RX_EV_DESC_PTR_WIDTH 12
+
+/* Transmit events */
+#define TX_EV_PKT_ERR_LBN 38
+#define TX_EV_PKT_ERR_WIDTH 1
+#define TX_EV_Q_LABEL_LBN 32
+#define TX_EV_Q_LABEL_WIDTH 5
+#define TX_EV_WQ_FF_FULL_LBN 15
+#define TX_EV_WQ_FF_FULL_WIDTH 1
+#define TX_EV_COMP_LBN 12
+#define TX_EV_COMP_WIDTH 1
+#define TX_EV_DESC_PTR_LBN 0
+#define TX_EV_DESC_PTR_WIDTH 12
+
+/* Driver events */
+#define DRIVER_EV_SUB_CODE_LBN 56
+#define DRIVER_EV_SUB_CODE_WIDTH 4
+#define DRIVER_EV_SUB_DATA_LBN 0
+#define DRIVER_EV_SUB_DATA_WIDTH 14
+#define TX_DESCQ_FLS_DONE_EV_DECODE 0
+#define RX_DESCQ_FLS_DONE_EV_DECODE 1
+#define EVQ_INIT_DONE_EV_DECODE 2
+#define EVQ_NOT_EN_EV_DECODE 3
+#define RX_DESCQ_FLSFF_OVFL_EV_DECODE 4
+#define SRM_UPD_DONE_EV_DECODE 5
+#define WAKE_UP_EV_DECODE 6
+#define TX_PKT_NON_TCP_UDP_DECODE 9
+#define TIMER_EV_DECODE 10
+#define RX_RECOVERY_EV_DECODE 11
+#define RX_DSC_ERROR_EV_DECODE 14
+#define TX_DSC_ERROR_EV_DECODE 15
+#define DRIVER_EV_TX_DESCQ_ID_LBN 0
+#define DRIVER_EV_TX_DESCQ_ID_WIDTH 12
+#define DRIVER_EV_RX_FLUSH_FAIL_LBN 12
+#define DRIVER_EV_RX_FLUSH_FAIL_WIDTH 1
+#define DRIVER_EV_RX_DESCQ_ID_LBN 0
+#define DRIVER_EV_RX_DESCQ_ID_WIDTH 12
+#define SRM_CLR_EV_DECODE 0
+#define SRM_UPD_EV_DECODE 1
+#define SRM_ILLCLR_EV_DECODE 2
+
+/* Global events */
+#define RX_RECOVERY_B0_LBN 12
+#define RX_RECOVERY_B0_WIDTH 1
+#define XG_MNT_INTR_B0_LBN 11
+#define XG_MNT_INTR_B0_WIDTH 1
+#define RX_RECOVERY_A1_LBN 11
+#define RX_RECOVERY_A1_WIDTH 1
+#define XG_PHY_INTR_LBN 9
+#define XG_PHY_INTR_WIDTH 1
+#define G_PHY1_INTR_LBN 8
+#define G_PHY1_INTR_WIDTH 1
+#define G_PHY0_INTR_LBN 7
+#define G_PHY0_INTR_WIDTH 1
+
+/* Driver-generated test events */
+#define EVQ_MAGIC_LBN 0
+#define EVQ_MAGIC_WIDTH 32
+
+/**************************************************************************
+ *
+ * Falcon MAC stats
+ *
+ **************************************************************************
+ *
+ */
+#define GRxGoodOct_offset 0x0
+#define GRxBadOct_offset 0x8
+#define GRxMissPkt_offset 0x10
+#define GRxFalseCRS_offset 0x14
+#define GRxPausePkt_offset 0x18
+#define GRxBadPkt_offset 0x1C
+#define GRxUcastPkt_offset 0x20
+#define GRxMcastPkt_offset 0x24
+#define GRxBcastPkt_offset 0x28
+#define GRxGoodLt64Pkt_offset 0x2C
+#define GRxBadLt64Pkt_offset 0x30
+#define GRx64Pkt_offset 0x34
+#define GRx65to127Pkt_offset 0x38
+#define GRx128to255Pkt_offset 0x3C
+#define GRx256to511Pkt_offset 0x40
+#define GRx512to1023Pkt_offset 0x44
+#define GRx1024to15xxPkt_offset 0x48
+#define GRx15xxtoJumboPkt_offset 0x4C
+#define GRxGtJumboPkt_offset 0x50
+#define GRxFcsErr64to15xxPkt_offset 0x54
+#define GRxFcsErr15xxtoJumboPkt_offset 0x58
+#define GRxFcsErrGtJumboPkt_offset 0x5C
+#define GTxGoodBadOct_offset 0x80
+#define GTxGoodOct_offset 0x88
+#define GTxSglColPkt_offset 0x90
+#define GTxMultColPkt_offset 0x94
+#define GTxExColPkt_offset 0x98
+#define GTxDefPkt_offset 0x9C
+#define GTxLateCol_offset 0xA0
+#define GTxExDefPkt_offset 0xA4
+#define GTxPausePkt_offset 0xA8
+#define GTxBadPkt_offset 0xAC
+#define GTxUcastPkt_offset 0xB0
+#define GTxMcastPkt_offset 0xB4
+#define GTxBcastPkt_offset 0xB8
+#define GTxLt64Pkt_offset 0xBC
+#define GTx64Pkt_offset 0xC0
+#define GTx65to127Pkt_offset 0xC4
+#define GTx128to255Pkt_offset 0xC8
+#define GTx256to511Pkt_offset 0xCC
+#define GTx512to1023Pkt_offset 0xD0
+#define GTx1024to15xxPkt_offset 0xD4
+#define GTx15xxtoJumboPkt_offset 0xD8
+#define GTxGtJumboPkt_offset 0xDC
+#define GTxNonTcpUdpPkt_offset 0xE0
+#define GTxMacSrcErrPkt_offset 0xE4
+#define GTxIpSrcErrPkt_offset 0xE8
+#define GDmaDone_offset 0xEC
+
+#define XgRxOctets_offset 0x0
+#define XgRxOctets_WIDTH 48
+#define XgRxOctetsOK_offset 0x8
+#define XgRxOctetsOK_WIDTH 48
+#define XgRxPkts_offset 0x10
+#define XgRxPkts_WIDTH 32
+#define XgRxPktsOK_offset 0x14
+#define XgRxPktsOK_WIDTH 32
+#define XgRxBroadcastPkts_offset 0x18
+#define XgRxBroadcastPkts_WIDTH 32
+#define XgRxMulticastPkts_offset 0x1C
+#define XgRxMulticastPkts_WIDTH 32
+#define XgRxUnicastPkts_offset 0x20
+#define XgRxUnicastPkts_WIDTH 32
+#define XgRxUndersizePkts_offset 0x24
+#define XgRxUndersizePkts_WIDTH 32
+#define XgRxOversizePkts_offset 0x28
+#define XgRxOversizePkts_WIDTH 32
+#define XgRxJabberPkts_offset 0x2C
+#define XgRxJabberPkts_WIDTH 32
+#define XgRxUndersizeFCSerrorPkts_offset 0x30
+#define XgRxUndersizeFCSerrorPkts_WIDTH 32
+#define XgRxDropEvents_offset 0x34
+#define XgRxDropEvents_WIDTH 32
+#define XgRxFCSerrorPkts_offset 0x38
+#define XgRxFCSerrorPkts_WIDTH 32
+#define XgRxAlignError_offset 0x3C
+#define XgRxAlignError_WIDTH 32
+#define XgRxSymbolError_offset 0x40
+#define XgRxSymbolError_WIDTH 32
+#define XgRxInternalMACError_offset 0x44
+#define XgRxInternalMACError_WIDTH 32
+#define XgRxControlPkts_offset 0x48
+#define XgRxControlPkts_WIDTH 32
+#define XgRxPausePkts_offset 0x4C
+#define XgRxPausePkts_WIDTH 32
+#define XgRxPkts64Octets_offset 0x50
+#define XgRxPkts64Octets_WIDTH 32
+#define XgRxPkts65to127Octets_offset 0x54
+#define XgRxPkts65to127Octets_WIDTH 32
+#define XgRxPkts128to255Octets_offset 0x58
+#define XgRxPkts128to255Octets_WIDTH 32
+#define XgRxPkts256to511Octets_offset 0x5C
+#define XgRxPkts256to511Octets_WIDTH 32
+#define XgRxPkts512to1023Octets_offset 0x60
+#define XgRxPkts512to1023Octets_WIDTH 32
+#define XgRxPkts1024to15xxOctets_offset 0x64
+#define XgRxPkts1024to15xxOctets_WIDTH 32
+#define XgRxPkts15xxtoMaxOctets_offset 0x68
+#define XgRxPkts15xxtoMaxOctets_WIDTH 32
+#define XgRxLengthError_offset 0x6C
+#define XgRxLengthError_WIDTH 32
+#define XgTxPkts_offset 0x80
+#define XgTxPkts_WIDTH 32
+#define XgTxOctets_offset 0x88
+#define XgTxOctets_WIDTH 48
+#define XgTxMulticastPkts_offset 0x90
+#define XgTxMulticastPkts_WIDTH 32
+#define XgTxBroadcastPkts_offset 0x94
+#define XgTxBroadcastPkts_WIDTH 32
+#define XgTxUnicastPkts_offset 0x98
+#define XgTxUnicastPkts_WIDTH 32
+#define XgTxControlPkts_offset 0x9C
+#define XgTxControlPkts_WIDTH 32
+#define XgTxPausePkts_offset 0xA0
+#define XgTxPausePkts_WIDTH 32
+#define XgTxPkts64Octets_offset 0xA4
+#define XgTxPkts64Octets_WIDTH 32
+#define XgTxPkts65to127Octets_offset 0xA8
+#define XgTxPkts65to127Octets_WIDTH 32
+#define XgTxPkts128to255Octets_offset 0xAC
+#define XgTxPkts128to255Octets_WIDTH 32
+#define XgTxPkts256to511Octets_offset 0xB0
+#define XgTxPkts256to511Octets_WIDTH 32
+#define XgTxPkts512to1023Octets_offset 0xB4
+#define XgTxPkts512to1023Octets_WIDTH 32
+#define XgTxPkts1024to15xxOctets_offset 0xB8
+#define XgTxPkts1024to15xxOctets_WIDTH 32
+#define XgTxPkts1519toMaxOctets_offset 0xBC
+#define XgTxPkts1519toMaxOctets_WIDTH 32
+#define XgTxUndersizePkts_offset 0xC0
+#define XgTxUndersizePkts_WIDTH 32
+#define XgTxOversizePkts_offset 0xC4
+#define XgTxOversizePkts_WIDTH 32
+#define XgTxNonTcpUdpPkt_offset 0xC8
+#define XgTxNonTcpUdpPkt_WIDTH 16
+#define XgTxMacSrcErrPkt_offset 0xCC
+#define XgTxMacSrcErrPkt_WIDTH 16
+#define XgTxIpSrcErrPkt_offset 0xD0
+#define XgTxIpSrcErrPkt_WIDTH 16
+#define XgDmaDone_offset 0xD4
+
+#define FALCON_STATS_NOT_DONE 0x00000000
+#define FALCON_STATS_DONE 0xffffffff
+
+/* Interrupt status register bits */
+#define FATAL_INT_LBN 64
+#define FATAL_INT_WIDTH 1
+#define INT_EVQS_LBN 40
+#define INT_EVQS_WIDTH 4
+
+/**************************************************************************
+ *
+ * Falcon non-volatile configuration
+ *
+ **************************************************************************
+ */
+
+/* Board configuration v2 (v1 is obsolete; later versions are compatible) */
+struct falcon_nvconfig_board_v2 {
+ __le16 nports;
+ u8 port0_phy_addr;
+ u8 port0_phy_type;
+ u8 port1_phy_addr;
+ u8 port1_phy_type;
+ __le16 asic_sub_revision;
+ __le16 board_revision;
+} __attribute__ ((packed));
+
+#define NVCONFIG_BASE 0x300
+#define NVCONFIG_BOARD_MAGIC_NUM 0xFA1C
+struct falcon_nvconfig {
+ efx_oword_t ee_vpd_cfg_reg; /* 0x300 */
+ u8 mac_address[2][8]; /* 0x310 */
+ efx_oword_t pcie_sd_ctl0123_reg; /* 0x320 */
+ efx_oword_t pcie_sd_ctl45_reg; /* 0x330 */
+ efx_oword_t pcie_pcs_ctl_stat_reg; /* 0x340 */
+ efx_oword_t hw_init_reg; /* 0x350 */
+ efx_oword_t nic_stat_reg; /* 0x360 */
+ efx_oword_t glb_ctl_reg; /* 0x370 */
+ efx_oword_t srm_cfg_reg; /* 0x380 */
+ efx_oword_t spare_reg; /* 0x390 */
+ __le16 board_magic_num; /* 0x3A0 */
+ __le16 board_struct_ver;
+ __le16 board_checksum;
+ struct falcon_nvconfig_board_v2 board_v2;
+} __attribute__ ((packed));
+
+#endif /* EFX_FALCON_HWDEFS_H */
diff --git a/drivers/net/sfc/falcon_io.h b/drivers/net/sfc/falcon_io.h
new file mode 100644
index 00000000000..ea08184ddfa
--- /dev/null
+++ b/drivers/net/sfc/falcon_io.h
@@ -0,0 +1,243 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_FALCON_IO_H
+#define EFX_FALCON_IO_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include "net_driver.h"
+
+/**************************************************************************
+ *
+ * Falcon hardware access
+ *
+ **************************************************************************
+ *
+ * Notes on locking strategy:
+ *
+ * Most Falcon registers require 16-byte (or 8-byte, for SRAM
+ * registers) atomic writes which necessitates locking.
+ * Under normal operation few writes to the Falcon BAR are made and these
+ * registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and TX_DESC_UPD_REG) are special
+ * cased to allow 4-byte (hence lockless) accesses.
+ *
+ * It *is* safe to write to these 4-byte registers in the middle of an
+ * access to an 8-byte or 16-byte register. We therefore use a
+ * spinlock to protect accesses to the larger registers, but no locks
+ * for the 4-byte registers.
+ *
+ * A write barrier is needed to ensure that DW3 is written after DW0/1/2
+ * due to the way the 16byte registers are "collected" in the Falcon BIU
+ *
+ * We also lock when carrying out reads, to ensure consistency of the
+ * data (made possible since the BIU reads all 128 bits into a cache).
+ * Reads are very rare, so this isn't a significant performance
+ * impact. (Most data transferred from NIC to host is DMAed directly
+ * into host memory).
+ *
+ * I/O BAR access uses locks for both reads and writes (but is only provided
+ * for testing purposes).
+ */
+
+/* Special buffer descriptors (Falcon SRAM) */
+#define BUF_TBL_KER_A1 0x18000
+#define BUF_TBL_KER_B0 0x800000
+
+
+#if BITS_PER_LONG == 64
+#define FALCON_USE_QWORD_IO 1
+#endif
+
+#define _falcon_writeq(efx, value, reg) \
+ __raw_writeq((__force u64) (value), (efx)->membase + (reg))
+#define _falcon_writel(efx, value, reg) \
+ __raw_writel((__force u32) (value), (efx)->membase + (reg))
+#define _falcon_readq(efx, reg) \
+ ((__force __le64) __raw_readq((efx)->membase + (reg)))
+#define _falcon_readl(efx, reg) \
+ ((__force __le32) __raw_readl((efx)->membase + (reg)))
+
+/* Writes to a normal 16-byte Falcon register, locking as appropriate. */
+static inline void falcon_write(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg)
+{
+ unsigned long flags;
+
+ EFX_REGDUMP(efx, "writing register %x with " EFX_OWORD_FMT "\n", reg,
+ EFX_OWORD_VAL(*value));
+
+ spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef FALCON_USE_QWORD_IO
+ _falcon_writeq(efx, value->u64[0], reg + 0);
+ wmb();
+ _falcon_writeq(efx, value->u64[1], reg + 8);
+#else
+ _falcon_writel(efx, value->u32[0], reg + 0);
+ _falcon_writel(efx, value->u32[1], reg + 4);
+ _falcon_writel(efx, value->u32[2], reg + 8);
+ wmb();
+ _falcon_writel(efx, value->u32[3], reg + 12);
+#endif
+ mmiowb();
+ spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Writes to an 8-byte Falcon SRAM register, locking as appropriate. */
+static inline void falcon_write_sram(struct efx_nic *efx, efx_qword_t *value,
+ unsigned int index)
+{
+ unsigned int reg = efx->type->buf_tbl_base + (index * sizeof(*value));
+ unsigned long flags;
+
+ EFX_REGDUMP(efx, "writing SRAM register %x with " EFX_QWORD_FMT "\n",
+ reg, EFX_QWORD_VAL(*value));
+
+ spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef FALCON_USE_QWORD_IO
+ _falcon_writeq(efx, value->u64[0], reg + 0);
+#else
+ _falcon_writel(efx, value->u32[0], reg + 0);
+ wmb();
+ _falcon_writel(efx, value->u32[1], reg + 4);
+#endif
+ mmiowb();
+ spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Write dword to Falcon register that allows partial writes
+ *
+ * Some Falcon registers (EVQ_RPTR_REG, RX_DESC_UPD_REG and
+ * TX_DESC_UPD_REG) can be written to as a single dword. This allows
+ * for lockless writes.
+ */
+static inline void falcon_writel(struct efx_nic *efx, efx_dword_t *value,
+ unsigned int reg)
+{
+ EFX_REGDUMP(efx, "writing partial register %x with "EFX_DWORD_FMT"\n",
+ reg, EFX_DWORD_VAL(*value));
+
+ /* No lock required */
+ _falcon_writel(efx, value->u32[0], reg);
+}
+
+/* Read from a Falcon register
+ *
+ * This reads an entire 16-byte Falcon register in one go, locking as
+ * appropriate. It is essential to read the first dword first, as this
+ * prompts Falcon to load the current value into the shadow register.
+ */
+static inline void falcon_read(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&efx->biu_lock, flags);
+ value->u32[0] = _falcon_readl(efx, reg + 0);
+ rmb();
+ value->u32[1] = _falcon_readl(efx, reg + 4);
+ value->u32[2] = _falcon_readl(efx, reg + 8);
+ value->u32[3] = _falcon_readl(efx, reg + 12);
+ spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+ EFX_REGDUMP(efx, "read from register %x, got " EFX_OWORD_FMT "\n", reg,
+ EFX_OWORD_VAL(*value));
+}
+
+/* This reads an 8-byte Falcon SRAM entry in one go. */
+static inline void falcon_read_sram(struct efx_nic *efx, efx_qword_t *value,
+ unsigned int index)
+{
+ unsigned int reg = efx->type->buf_tbl_base + (index * sizeof(*value));
+ unsigned long flags;
+
+ spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef FALCON_USE_QWORD_IO
+ value->u64[0] = _falcon_readq(efx, reg + 0);
+#else
+ value->u32[0] = _falcon_readl(efx, reg + 0);
+ rmb();
+ value->u32[1] = _falcon_readl(efx, reg + 4);
+#endif
+ spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+ EFX_REGDUMP(efx, "read from SRAM register %x, got "EFX_QWORD_FMT"\n",
+ reg, EFX_QWORD_VAL(*value));
+}
+
+/* Read dword from Falcon register that allows partial writes (sic) */
+static inline void falcon_readl(struct efx_nic *efx, efx_dword_t *value,
+ unsigned int reg)
+{
+ value->u32[0] = _falcon_readl(efx, reg);
+ EFX_REGDUMP(efx, "read from register %x, got "EFX_DWORD_FMT"\n",
+ reg, EFX_DWORD_VAL(*value));
+}
+
+/* Write to a register forming part of a table */
+static inline void falcon_write_table(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg, unsigned int index)
+{
+ falcon_write(efx, value, reg + index * sizeof(efx_oword_t));
+}
+
+/* Read to a register forming part of a table */
+static inline void falcon_read_table(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg, unsigned int index)
+{
+ falcon_read(efx, value, reg + index * sizeof(efx_oword_t));
+}
+
+/* Write to a dword register forming part of a table */
+static inline void falcon_writel_table(struct efx_nic *efx, efx_dword_t *value,
+ unsigned int reg, unsigned int index)
+{
+ falcon_writel(efx, value, reg + index * sizeof(efx_oword_t));
+}
+
+/* Page-mapped register block size */
+#define FALCON_PAGE_BLOCK_SIZE 0x2000
+
+/* Calculate offset to page-mapped register block */
+#define FALCON_PAGED_REG(page, reg) \
+ ((page) * FALCON_PAGE_BLOCK_SIZE + (reg))
+
+/* As for falcon_write(), but for a page-mapped register. */
+static inline void falcon_write_page(struct efx_nic *efx, efx_oword_t *value,
+ unsigned int reg, unsigned int page)
+{
+ falcon_write(efx, value, FALCON_PAGED_REG(page, reg));
+}
+
+/* As for falcon_writel(), but for a page-mapped register. */
+static inline void falcon_writel_page(struct efx_nic *efx, efx_dword_t *value,
+ unsigned int reg, unsigned int page)
+{
+ falcon_writel(efx, value, FALCON_PAGED_REG(page, reg));
+}
+
+/* Write dword to Falcon page-mapped register with an extra lock.
+ *
+ * As for falcon_writel_page(), but for a register that suffers from
+ * SFC bug 3181. Take out a lock so the BIU collector cannot be
+ * confused. */
+static inline void falcon_writel_page_locked(struct efx_nic *efx,
+ efx_dword_t *value,
+ unsigned int reg,
+ unsigned int page)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&efx->biu_lock, flags);
+ falcon_writel(efx, value, FALCON_PAGED_REG(page, reg));
+ spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+#endif /* EFX_FALCON_IO_H */
diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c
new file mode 100644
index 00000000000..aa7521b24a5
--- /dev/null
+++ b/drivers/net/sfc/falcon_xmac.c
@@ -0,0 +1,585 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/delay.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "falcon.h"
+#include "falcon_hwdefs.h"
+#include "falcon_io.h"
+#include "mac.h"
+#include "gmii.h"
+#include "mdio_10g.h"
+#include "phy.h"
+#include "boards.h"
+#include "workarounds.h"
+
+/**************************************************************************
+ *
+ * MAC register access
+ *
+ **************************************************************************/
+
+/* Offset of an XMAC register within Falcon */
+#define FALCON_XMAC_REG(mac_reg) \
+ (FALCON_XMAC_REGBANK + ((mac_reg) * FALCON_XMAC_REG_SIZE))
+
+void falcon_xmac_writel(struct efx_nic *efx,
+ efx_dword_t *value, unsigned int mac_reg)
+{
+ efx_oword_t temp;
+
+ EFX_POPULATE_OWORD_1(temp, MAC_DATA, EFX_DWORD_FIELD(*value, MAC_DATA));
+ falcon_write(efx, &temp, FALCON_XMAC_REG(mac_reg));
+}
+
+void falcon_xmac_readl(struct efx_nic *efx,
+ efx_dword_t *value, unsigned int mac_reg)
+{
+ efx_oword_t temp;
+
+ falcon_read(efx, &temp, FALCON_XMAC_REG(mac_reg));
+ EFX_POPULATE_DWORD_1(*value, MAC_DATA, EFX_OWORD_FIELD(temp, MAC_DATA));
+}
+
+/**************************************************************************
+ *
+ * MAC operations
+ *
+ *************************************************************************/
+static int falcon_reset_xmac(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+ int count;
+
+ EFX_POPULATE_DWORD_1(reg, XM_CORE_RST, 1);
+ falcon_xmac_writel(efx, &reg, XM_GLB_CFG_REG_MAC);
+
+ for (count = 0; count < 10000; count++) { /* wait upto 100ms */
+ falcon_xmac_readl(efx, &reg, XM_GLB_CFG_REG_MAC);
+ if (EFX_DWORD_FIELD(reg, XM_CORE_RST) == 0)
+ return 0;
+ udelay(10);
+ }
+
+ EFX_ERR(efx, "timed out waiting for XMAC core reset\n");
+ return -ETIMEDOUT;
+}
+
+/* Configure the XAUI driver that is an output from Falcon */
+static void falcon_setup_xaui(struct efx_nic *efx)
+{
+ efx_dword_t sdctl, txdrv;
+
+ /* Move the XAUI into low power, unless there is no PHY, in
+ * which case the XAUI will have to drive a cable. */
+ if (efx->phy_type == PHY_TYPE_NONE)
+ return;
+
+ falcon_xmac_readl(efx, &sdctl, XX_SD_CTL_REG_MAC);
+ EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVD, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_LODRVD, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVC, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_LODRVC, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVB, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_LODRVB, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_HIDRVA, XX_SD_CTL_DRV_DEFAULT);
+ EFX_SET_DWORD_FIELD(sdctl, XX_LODRVA, XX_SD_CTL_DRV_DEFAULT);
+ falcon_xmac_writel(efx, &sdctl, XX_SD_CTL_REG_MAC);
+
+ EFX_POPULATE_DWORD_8(txdrv,
+ XX_DEQD, XX_TXDRV_DEQ_DEFAULT,
+ XX_DEQC, XX_TXDRV_DEQ_DEFAULT,
+ XX_DEQB, XX_TXDRV_DEQ_DEFAULT,
+ XX_DEQA, XX_TXDRV_DEQ_DEFAULT,
+ XX_DTXD, XX_TXDRV_DTX_DEFAULT,
+ XX_DTXC, XX_TXDRV_DTX_DEFAULT,
+ XX_DTXB, XX_TXDRV_DTX_DEFAULT,
+ XX_DTXA, XX_TXDRV_DTX_DEFAULT);
+ falcon_xmac_writel(efx, &txdrv, XX_TXDRV_CTL_REG_MAC);
+}
+
+static void falcon_hold_xaui_in_rst(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+
+ EFX_ZERO_DWORD(reg);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNA_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNB_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNC_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDND_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RSTPLLAB_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RSTPLLCD_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETA_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETB_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETC_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETD_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSRX_EN, 1);
+ EFX_SET_DWORD_FIELD(reg, XX_RSTXGXSTX_EN, 1);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+}
+
+static int _falcon_reset_xaui_a(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+
+ falcon_hold_xaui_in_rst(efx);
+ falcon_xmac_readl(efx, &reg, XX_PWR_RST_REG_MAC);
+
+ /* Follow the RAMBUS XAUI data reset sequencing
+ * Channels A and B first: power down, reset PLL, reset, clear
+ */
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNA_EN, 0);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNB_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ EFX_SET_DWORD_FIELD(reg, XX_RSTPLLAB_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ EFX_SET_DWORD_FIELD(reg, XX_RESETA_EN, 0);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETB_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ /* Channels C and D: power down, reset PLL, reset, clear */
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDNC_EN, 0);
+ EFX_SET_DWORD_FIELD(reg, XX_PWRDND_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ EFX_SET_DWORD_FIELD(reg, XX_RSTPLLCD_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ EFX_SET_DWORD_FIELD(reg, XX_RESETC_EN, 0);
+ EFX_SET_DWORD_FIELD(reg, XX_RESETD_EN, 0);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ /* Setup XAUI */
+ falcon_setup_xaui(efx);
+ udelay(10);
+
+ /* Take XGXS out of reset */
+ EFX_ZERO_DWORD(reg);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ return 0;
+}
+
+static int _falcon_reset_xaui_b(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+ int count;
+
+ EFX_POPULATE_DWORD_1(reg, XX_RST_XX_EN, 1);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+
+ /* Give some time for the link to establish */
+ for (count = 0; count < 1000; count++) { /* wait upto 10ms */
+ falcon_xmac_readl(efx, &reg, XX_PWR_RST_REG_MAC);
+ if (EFX_DWORD_FIELD(reg, XX_RST_XX_EN) == 0) {
+ falcon_setup_xaui(efx);
+ return 0;
+ }
+ udelay(10);
+ }
+ EFX_ERR(efx, "timed out waiting for XAUI/XGXS reset\n");
+ return -ETIMEDOUT;
+}
+
+int falcon_reset_xaui(struct efx_nic *efx)
+{
+ int rc;
+
+ if (EFX_WORKAROUND_9388(efx)) {
+ falcon_hold_xaui_in_rst(efx);
+ efx->phy_op->reset_xaui(efx);
+ rc = _falcon_reset_xaui_a(efx);
+ } else {
+ rc = _falcon_reset_xaui_b(efx);
+ }
+ return rc;
+}
+
+static int falcon_xgmii_status(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+
+ if (FALCON_REV(efx) < FALCON_REV_B0)
+ return 1;
+
+ /* The ISR latches, so clear it and re-read */
+ falcon_xmac_readl(efx, &reg, XM_MGT_INT_REG_MAC_B0);
+ falcon_xmac_readl(efx, &reg, XM_MGT_INT_REG_MAC_B0);
+
+ if (EFX_DWORD_FIELD(reg, XM_LCLFLT) ||
+ EFX_DWORD_FIELD(reg, XM_RMTFLT)) {
+ EFX_INFO(efx, "MGT_INT: "EFX_DWORD_FMT"\n", EFX_DWORD_VAL(reg));
+ return 0;
+ }
+
+ return 1;
+}
+
+static void falcon_mask_status_intr(struct efx_nic *efx, int enable)
+{
+ efx_dword_t reg;
+
+ if (FALCON_REV(efx) < FALCON_REV_B0)
+ return;
+
+ /* Flush the ISR */
+ if (enable)
+ falcon_xmac_readl(efx, &reg, XM_MGT_INT_REG_MAC_B0);
+
+ EFX_POPULATE_DWORD_2(reg,
+ XM_MSK_RMTFLT, !enable,
+ XM_MSK_LCLFLT, !enable);
+ falcon_xmac_writel(efx, &reg, XM_MGT_INT_MSK_REG_MAC_B0);
+}
+
+int falcon_init_xmac(struct efx_nic *efx)
+{
+ int rc;
+
+ /* Initialize the PHY first so the clock is around */
+ rc = efx->phy_op->init(efx);
+ if (rc)
+ goto fail1;
+
+ rc = falcon_reset_xaui(efx);
+ if (rc)
+ goto fail2;
+
+ /* Wait again. Give the PHY and MAC time to come back */
+ schedule_timeout_uninterruptible(HZ / 10);
+
+ rc = falcon_reset_xmac(efx);
+ if (rc)
+ goto fail2;
+
+ falcon_mask_status_intr(efx, 1);
+ return 0;
+
+ fail2:
+ efx->phy_op->fini(efx);
+ fail1:
+ return rc;
+}
+
+int falcon_xaui_link_ok(struct efx_nic *efx)
+{
+ efx_dword_t reg;
+ int align_done, sync_status, link_ok = 0;
+
+ /* Read link status */
+ falcon_xmac_readl(efx, &reg, XX_CORE_STAT_REG_MAC);
+
+ align_done = EFX_DWORD_FIELD(reg, XX_ALIGN_DONE);
+ sync_status = EFX_DWORD_FIELD(reg, XX_SYNC_STAT);
+ if (align_done && (sync_status == XX_SYNC_STAT_DECODE_SYNCED))
+ link_ok = 1;
+
+ /* Clear link status ready for next read */
+ EFX_SET_DWORD_FIELD(reg, XX_COMMA_DET, XX_COMMA_DET_RESET);
+ EFX_SET_DWORD_FIELD(reg, XX_CHARERR, XX_CHARERR_RESET);
+ EFX_SET_DWORD_FIELD(reg, XX_DISPERR, XX_DISPERR_RESET);
+ falcon_xmac_writel(efx, &reg, XX_CORE_STAT_REG_MAC);
+
+ /* If the link is up, then check the phy side of the xaui link
+ * (error conditions from the wire side propoagate back through
+ * the phy to the xaui side). */
+ if (efx->link_up && link_ok) {
+ int has_phyxs = efx->phy_op->mmds & (1 << MDIO_MMD_PHYXS);
+ if (has_phyxs)
+ link_ok = mdio_clause45_phyxgxs_lane_sync(efx);
+ }
+
+ /* If the PHY and XAUI links are up, then check the mac's xgmii
+ * fault state */
+ if (efx->link_up && link_ok)
+ link_ok = falcon_xgmii_status(efx);
+
+ return link_ok;
+}
+
+static void falcon_reconfigure_xmac_core(struct efx_nic *efx)
+{
+ unsigned int max_frame_len;
+ efx_dword_t reg;
+ int rx_fc = (efx->flow_control & EFX_FC_RX) ? 1 : 0;
+
+ /* Configure MAC - cut-thru mode is hard wired on */
+ EFX_POPULATE_DWORD_3(reg,
+ XM_RX_JUMBO_MODE, 1,
+ XM_TX_STAT_EN, 1,
+ XM_RX_STAT_EN, 1);
+ falcon_xmac_writel(efx, &reg, XM_GLB_CFG_REG_MAC);
+
+ /* Configure TX */
+ EFX_POPULATE_DWORD_6(reg,
+ XM_TXEN, 1,
+ XM_TX_PRMBL, 1,
+ XM_AUTO_PAD, 1,
+ XM_TXCRC, 1,
+ XM_FCNTL, 1,
+ XM_IPG, 0x3);
+ falcon_xmac_writel(efx, &reg, XM_TX_CFG_REG_MAC);
+
+ /* Configure RX */
+ EFX_POPULATE_DWORD_5(reg,
+ XM_RXEN, 1,
+ XM_AUTO_DEPAD, 0,
+ XM_ACPT_ALL_MCAST, 1,
+ XM_ACPT_ALL_UCAST, efx->promiscuous,
+ XM_PASS_CRC_ERR, 1);
+ falcon_xmac_writel(efx, &reg, XM_RX_CFG_REG_MAC);
+
+ /* Set frame length */
+ max_frame_len = EFX_MAX_FRAME_LEN(efx->net_dev->mtu);
+ EFX_POPULATE_DWORD_1(reg, XM_MAX_RX_FRM_SIZE, max_frame_len);
+ falcon_xmac_writel(efx, &reg, XM_RX_PARAM_REG_MAC);
+ EFX_POPULATE_DWORD_2(reg,
+ XM_MAX_TX_FRM_SIZE, max_frame_len,
+ XM_TX_JUMBO_MODE, 1);
+ falcon_xmac_writel(efx, &reg, XM_TX_PARAM_REG_MAC);
+
+ EFX_POPULATE_DWORD_2(reg,
+ XM_PAUSE_TIME, 0xfffe, /* MAX PAUSE TIME */
+ XM_DIS_FCNTL, rx_fc ? 0 : 1);
+ falcon_xmac_writel(efx, &reg, XM_FC_REG_MAC);
+
+ /* Set MAC address */
+ EFX_POPULATE_DWORD_4(reg,
+ XM_ADR_0, efx->net_dev->dev_addr[0],
+ XM_ADR_1, efx->net_dev->dev_addr[1],
+ XM_ADR_2, efx->net_dev->dev_addr[2],
+ XM_ADR_3, efx->net_dev->dev_addr[3]);
+ falcon_xmac_writel(efx, &reg, XM_ADR_LO_REG_MAC);
+ EFX_POPULATE_DWORD_2(reg,
+ XM_ADR_4, efx->net_dev->dev_addr[4],
+ XM_ADR_5, efx->net_dev->dev_addr[5]);
+ falcon_xmac_writel(efx, &reg, XM_ADR_HI_REG_MAC);
+}
+
+/* Try and bring the Falcon side of the Falcon-Phy XAUI link fails
+ * to come back up. Bash it until it comes back up */
+static int falcon_check_xaui_link_up(struct efx_nic *efx)
+{
+ int max_tries, tries;
+ tries = EFX_WORKAROUND_5147(efx) ? 5 : 1;
+ max_tries = tries;
+
+ if (efx->phy_type == PHY_TYPE_NONE)
+ return 0;
+
+ while (tries) {
+ if (falcon_xaui_link_ok(efx))
+ return 1;
+
+ EFX_LOG(efx, "%s Clobbering XAUI (%d tries left).\n",
+ __func__, tries);
+ (void) falcon_reset_xaui(efx);
+ udelay(200);
+ tries--;
+ }
+
+ EFX_ERR(efx, "Failed to bring XAUI link back up in %d tries!\n",
+ max_tries);
+ return 0;
+}
+
+void falcon_reconfigure_xmac(struct efx_nic *efx)
+{
+ int xaui_link_ok;
+
+ falcon_mask_status_intr(efx, 0);
+
+ falcon_deconfigure_mac_wrapper(efx);
+ efx->phy_op->reconfigure(efx);
+ falcon_reconfigure_xmac_core(efx);
+ falcon_reconfigure_mac_wrapper(efx);
+
+ /* Ensure XAUI link is up */
+ xaui_link_ok = falcon_check_xaui_link_up(efx);
+
+ if (xaui_link_ok && efx->link_up)
+ falcon_mask_status_intr(efx, 1);
+}
+
+void falcon_fini_xmac(struct efx_nic *efx)
+{
+ /* Isolate the MAC - PHY */
+ falcon_deconfigure_mac_wrapper(efx);
+
+ /* Potentially power down the PHY */
+ efx->phy_op->fini(efx);
+}
+
+void falcon_update_stats_xmac(struct efx_nic *efx)
+{
+ struct efx_mac_stats *mac_stats = &efx->mac_stats;
+ int rc;
+
+ rc = falcon_dma_stats(efx, XgDmaDone_offset);
+ if (rc)
+ return;
+
+ /* Update MAC stats from DMAed values */
+ FALCON_STAT(efx, XgRxOctets, rx_bytes);
+ FALCON_STAT(efx, XgRxOctetsOK, rx_good_bytes);
+ FALCON_STAT(efx, XgRxPkts, rx_packets);
+ FALCON_STAT(efx, XgRxPktsOK, rx_good);
+ FALCON_STAT(efx, XgRxBroadcastPkts, rx_broadcast);
+ FALCON_STAT(efx, XgRxMulticastPkts, rx_multicast);
+ FALCON_STAT(efx, XgRxUnicastPkts, rx_unicast);
+ FALCON_STAT(efx, XgRxUndersizePkts, rx_lt64);
+ FALCON_STAT(efx, XgRxOversizePkts, rx_gtjumbo);
+ FALCON_STAT(efx, XgRxJabberPkts, rx_bad_gtjumbo);
+ FALCON_STAT(efx, XgRxUndersizeFCSerrorPkts, rx_bad_lt64);
+ FALCON_STAT(efx, XgRxDropEvents, rx_overflow);
+ FALCON_STAT(efx, XgRxFCSerrorPkts, rx_bad);
+ FALCON_STAT(efx, XgRxAlignError, rx_align_error);
+ FALCON_STAT(efx, XgRxSymbolError, rx_symbol_error);
+ FALCON_STAT(efx, XgRxInternalMACError, rx_internal_error);
+ FALCON_STAT(efx, XgRxControlPkts, rx_control);
+ FALCON_STAT(efx, XgRxPausePkts, rx_pause);
+ FALCON_STAT(efx, XgRxPkts64Octets, rx_64);
+ FALCON_STAT(efx, XgRxPkts65to127Octets, rx_65_to_127);
+ FALCON_STAT(efx, XgRxPkts128to255Octets, rx_128_to_255);
+ FALCON_STAT(efx, XgRxPkts256to511Octets, rx_256_to_511);
+ FALCON_STAT(efx, XgRxPkts512to1023Octets, rx_512_to_1023);
+ FALCON_STAT(efx, XgRxPkts1024to15xxOctets, rx_1024_to_15xx);
+ FALCON_STAT(efx, XgRxPkts15xxtoMaxOctets, rx_15xx_to_jumbo);
+ FALCON_STAT(efx, XgRxLengthError, rx_length_error);
+ FALCON_STAT(efx, XgTxPkts, tx_packets);
+ FALCON_STAT(efx, XgTxOctets, tx_bytes);
+ FALCON_STAT(efx, XgTxMulticastPkts, tx_multicast);
+ FALCON_STAT(efx, XgTxBroadcastPkts, tx_broadcast);
+ FALCON_STAT(efx, XgTxUnicastPkts, tx_unicast);
+ FALCON_STAT(efx, XgTxControlPkts, tx_control);
+ FALCON_STAT(efx, XgTxPausePkts, tx_pause);
+ FALCON_STAT(efx, XgTxPkts64Octets, tx_64);
+ FALCON_STAT(efx, XgTxPkts65to127Octets, tx_65_to_127);
+ FALCON_STAT(efx, XgTxPkts128to255Octets, tx_128_to_255);
+ FALCON_STAT(efx, XgTxPkts256to511Octets, tx_256_to_511);
+ FALCON_STAT(efx, XgTxPkts512to1023Octets, tx_512_to_1023);
+ FALCON_STAT(efx, XgTxPkts1024to15xxOctets, tx_1024_to_15xx);
+ FALCON_STAT(efx, XgTxPkts1519toMaxOctets, tx_15xx_to_jumbo);
+ FALCON_STAT(efx, XgTxUndersizePkts, tx_lt64);
+ FALCON_STAT(efx, XgTxOversizePkts, tx_gtjumbo);
+ FALCON_STAT(efx, XgTxNonTcpUdpPkt, tx_non_tcpudp);
+ FALCON_STAT(efx, XgTxMacSrcErrPkt, tx_mac_src_error);
+ FALCON_STAT(efx, XgTxIpSrcErrPkt, tx_ip_src_error);
+
+ /* Update derived statistics */
+ mac_stats->tx_good_bytes =
+ (mac_stats->tx_bytes - mac_stats->tx_bad_bytes);
+ mac_stats->rx_bad_bytes =
+ (mac_stats->rx_bytes - mac_stats->rx_good_bytes);
+}
+
+#define EFX_XAUI_RETRAIN_MAX 8
+
+int falcon_check_xmac(struct efx_nic *efx)
+{
+ unsigned xaui_link_ok;
+ int rc;
+
+ falcon_mask_status_intr(efx, 0);
+ xaui_link_ok = falcon_xaui_link_ok(efx);
+
+ if (EFX_WORKAROUND_5147(efx) && !xaui_link_ok)
+ (void) falcon_reset_xaui(efx);
+
+ /* Call the PHY check_hw routine */
+ rc = efx->phy_op->check_hw(efx);
+
+ /* Unmask interrupt if everything was (and still is) ok */
+ if (xaui_link_ok && efx->link_up)
+ falcon_mask_status_intr(efx, 1);
+
+ return rc;
+}
+
+/* Simulate a PHY event */
+void falcon_xmac_sim_phy_event(struct efx_nic *efx)
+{
+ efx_qword_t phy_event;
+
+ EFX_POPULATE_QWORD_2(phy_event,
+ EV_CODE, GLOBAL_EV_DECODE,
+ XG_PHY_INTR, 1);
+ falcon_generate_event(&efx->channel[0], &phy_event);
+}
+
+int falcon_xmac_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+{
+ mdio_clause45_get_settings(efx, ecmd);
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->phy_address = efx->mii.phy_id;
+ ecmd->autoneg = AUTONEG_DISABLE;
+ ecmd->duplex = DUPLEX_FULL;
+ return 0;
+}
+
+int falcon_xmac_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+{
+ if (ecmd->transceiver != XCVR_INTERNAL)
+ return -EINVAL;
+ if (ecmd->autoneg != AUTONEG_DISABLE)
+ return -EINVAL;
+ if (ecmd->duplex != DUPLEX_FULL)
+ return -EINVAL;
+
+ return mdio_clause45_set_settings(efx, ecmd);
+}
+
+
+int falcon_xmac_set_pause(struct efx_nic *efx, enum efx_fc_type flow_control)
+{
+ int reset;
+
+ if (flow_control & EFX_FC_AUTO) {
+ EFX_LOG(efx, "10G does not support flow control "
+ "autonegotiation\n");
+ return -EINVAL;
+ }
+
+ if ((flow_control & EFX_FC_TX) && !(flow_control & EFX_FC_RX))
+ return -EINVAL;
+
+ /* TX flow control may automatically turn itself off if the
+ * link partner (intermittently) stops responding to pause
+ * frames. There isn't any indication that this has happened,
+ * so the best we do is leave it up to the user to spot this
+ * and fix it be cycling transmit flow control on this end. */
+ reset = ((flow_control & EFX_FC_TX) &&
+ !(efx->flow_control & EFX_FC_TX));
+ if (EFX_WORKAROUND_11482(efx) && reset) {
+ if (FALCON_REV(efx) >= FALCON_REV_B0) {
+ /* Recover by resetting the EM block */
+ if (efx->link_up)
+ falcon_drain_tx_fifo(efx);
+ } else {
+ /* Schedule a reset to recover */
+ efx_schedule_reset(efx, RESET_TYPE_INVISIBLE);
+ }
+ }
+
+ efx->flow_control = flow_control;
+
+ return 0;
+}
diff --git a/drivers/net/sfc/gmii.h b/drivers/net/sfc/gmii.h
new file mode 100644
index 00000000000..d25bbd1297f
--- /dev/null
+++ b/drivers/net/sfc/gmii.h
@@ -0,0 +1,195 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_GMII_H
+#define EFX_GMII_H
+
+/*
+ * GMII interface
+ */
+
+#include <linux/mii.h>
+
+/* GMII registers, excluding registers already defined as MII
+ * registers in mii.h
+ */
+#define GMII_IER 0x12 /* Interrupt enable register */
+#define GMII_ISR 0x13 /* Interrupt status register */
+
+/* Interrupt enable register */
+#define IER_ANEG_ERR 0x8000 /* Bit 15 - autonegotiation error */
+#define IER_SPEED_CHG 0x4000 /* Bit 14 - speed changed */
+#define IER_DUPLEX_CHG 0x2000 /* Bit 13 - duplex changed */
+#define IER_PAGE_RCVD 0x1000 /* Bit 12 - page received */
+#define IER_ANEG_DONE 0x0800 /* Bit 11 - autonegotiation complete */
+#define IER_LINK_CHG 0x0400 /* Bit 10 - link status changed */
+#define IER_SYM_ERR 0x0200 /* Bit 9 - symbol error */
+#define IER_FALSE_CARRIER 0x0100 /* Bit 8 - false carrier */
+#define IER_FIFO_ERR 0x0080 /* Bit 7 - FIFO over/underflow */
+#define IER_MDIX_CHG 0x0040 /* Bit 6 - MDI crossover changed */
+#define IER_DOWNSHIFT 0x0020 /* Bit 5 - downshift */
+#define IER_ENERGY 0x0010 /* Bit 4 - energy detect */
+#define IER_DTE_POWER 0x0004 /* Bit 2 - DTE power detect */
+#define IER_POLARITY_CHG 0x0002 /* Bit 1 - polarity changed */
+#define IER_JABBER 0x0001 /* Bit 0 - jabber */
+
+/* Interrupt status register */
+#define ISR_ANEG_ERR 0x8000 /* Bit 15 - autonegotiation error */
+#define ISR_SPEED_CHG 0x4000 /* Bit 14 - speed changed */
+#define ISR_DUPLEX_CHG 0x2000 /* Bit 13 - duplex changed */
+#define ISR_PAGE_RCVD 0x1000 /* Bit 12 - page received */
+#define ISR_ANEG_DONE 0x0800 /* Bit 11 - autonegotiation complete */
+#define ISR_LINK_CHG 0x0400 /* Bit 10 - link status changed */
+#define ISR_SYM_ERR 0x0200 /* Bit 9 - symbol error */
+#define ISR_FALSE_CARRIER 0x0100 /* Bit 8 - false carrier */
+#define ISR_FIFO_ERR 0x0080 /* Bit 7 - FIFO over/underflow */
+#define ISR_MDIX_CHG 0x0040 /* Bit 6 - MDI crossover changed */
+#define ISR_DOWNSHIFT 0x0020 /* Bit 5 - downshift */
+#define ISR_ENERGY 0x0010 /* Bit 4 - energy detect */
+#define ISR_DTE_POWER 0x0004 /* Bit 2 - DTE power detect */
+#define ISR_POLARITY_CHG 0x0002 /* Bit 1 - polarity changed */
+#define ISR_JABBER 0x0001 /* Bit 0 - jabber */
+
+/* Logically extended advertisement register */
+#define GM_ADVERTISE_SLCT ADVERTISE_SLCT
+#define GM_ADVERTISE_CSMA ADVERTISE_CSMA
+#define GM_ADVERTISE_10HALF ADVERTISE_10HALF
+#define GM_ADVERTISE_1000XFULL ADVERTISE_1000XFULL
+#define GM_ADVERTISE_10FULL ADVERTISE_10FULL
+#define GM_ADVERTISE_1000XHALF ADVERTISE_1000XHALF
+#define GM_ADVERTISE_100HALF ADVERTISE_100HALF
+#define GM_ADVERTISE_1000XPAUSE ADVERTISE_1000XPAUSE
+#define GM_ADVERTISE_100FULL ADVERTISE_100FULL
+#define GM_ADVERTISE_1000XPSE_ASYM ADVERTISE_1000XPSE_ASYM
+#define GM_ADVERTISE_100BASE4 ADVERTISE_100BASE4
+#define GM_ADVERTISE_PAUSE_CAP ADVERTISE_PAUSE_CAP
+#define GM_ADVERTISE_PAUSE_ASYM ADVERTISE_PAUSE_ASYM
+#define GM_ADVERTISE_RESV ADVERTISE_RESV
+#define GM_ADVERTISE_RFAULT ADVERTISE_RFAULT
+#define GM_ADVERTISE_LPACK ADVERTISE_LPACK
+#define GM_ADVERTISE_NPAGE ADVERTISE_NPAGE
+#define GM_ADVERTISE_1000FULL (ADVERTISE_1000FULL << 8)
+#define GM_ADVERTISE_1000HALF (ADVERTISE_1000HALF << 8)
+#define GM_ADVERTISE_1000 (GM_ADVERTISE_1000FULL | \
+ GM_ADVERTISE_1000HALF)
+#define GM_ADVERTISE_FULL (GM_ADVERTISE_1000FULL | \
+ ADVERTISE_FULL)
+#define GM_ADVERTISE_ALL (GM_ADVERTISE_1000FULL | \
+ GM_ADVERTISE_1000HALF | \
+ ADVERTISE_ALL)
+
+/* Logically extended link partner ability register */
+#define GM_LPA_SLCT LPA_SLCT
+#define GM_LPA_10HALF LPA_10HALF
+#define GM_LPA_1000XFULL LPA_1000XFULL
+#define GM_LPA_10FULL LPA_10FULL
+#define GM_LPA_1000XHALF LPA_1000XHALF
+#define GM_LPA_100HALF LPA_100HALF
+#define GM_LPA_1000XPAUSE LPA_1000XPAUSE
+#define GM_LPA_100FULL LPA_100FULL
+#define GM_LPA_1000XPAUSE_ASYM LPA_1000XPAUSE_ASYM
+#define GM_LPA_100BASE4 LPA_100BASE4
+#define GM_LPA_PAUSE_CAP LPA_PAUSE_CAP
+#define GM_LPA_PAUSE_ASYM LPA_PAUSE_ASYM
+#define GM_LPA_RESV LPA_RESV
+#define GM_LPA_RFAULT LPA_RFAULT
+#define GM_LPA_LPACK LPA_LPACK
+#define GM_LPA_NPAGE LPA_NPAGE
+#define GM_LPA_1000FULL (LPA_1000FULL << 6)
+#define GM_LPA_1000HALF (LPA_1000HALF << 6)
+#define GM_LPA_10000FULL 0x00040000
+#define GM_LPA_10000HALF 0x00080000
+#define GM_LPA_DUPLEX (GM_LPA_1000FULL | GM_LPA_10000FULL \
+ | LPA_DUPLEX)
+#define GM_LPA_10 (LPA_10FULL | LPA_10HALF)
+#define GM_LPA_100 LPA_100
+#define GM_LPA_1000 (GM_LPA_1000FULL | GM_LPA_1000HALF)
+#define GM_LPA_10000 (GM_LPA_10000FULL | GM_LPA_10000HALF)
+
+/* Retrieve GMII autonegotiation advertised abilities
+ *
+ * The MII advertisment register (MII_ADVERTISE) is logically extended
+ * to include advertisement bits ADVERTISE_1000FULL and
+ * ADVERTISE_1000HALF from MII_CTRL1000. The result can be tested
+ * against the GM_ADVERTISE_xxx constants.
+ */
+static inline unsigned int gmii_advertised(struct mii_if_info *gmii)
+{
+ unsigned int advertise;
+ unsigned int ctrl1000;
+
+ advertise = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_ADVERTISE);
+ ctrl1000 = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_CTRL1000);
+ return (((ctrl1000 << 8) & GM_ADVERTISE_1000) | advertise);
+}
+
+/* Retrieve GMII autonegotiation link partner abilities
+ *
+ * The MII link partner ability register (MII_LPA) is logically
+ * extended by adding bits LPA_1000HALF and LPA_1000FULL from
+ * MII_STAT1000. The result can be tested against the GM_LPA_xxx
+ * constants.
+ */
+static inline unsigned int gmii_lpa(struct mii_if_info *gmii)
+{
+ unsigned int lpa;
+ unsigned int stat1000;
+
+ lpa = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_LPA);
+ stat1000 = gmii->mdio_read(gmii->dev, gmii->phy_id, MII_STAT1000);
+ return (((stat1000 << 6) & GM_LPA_1000) | lpa);
+}
+
+/* Calculate GMII autonegotiated link technology
+ *
+ * "negotiated" should be the result of gmii_advertised() logically
+ * ANDed with the result of gmii_lpa().
+ *
+ * "tech" will be negotiated with the unused bits masked out. For
+ * example, if both ends of the link are capable of both
+ * GM_LPA_1000FULL and GM_LPA_100FULL, GM_LPA_100FULL will be masked
+ * out.
+ */
+static inline unsigned int gmii_nway_result(unsigned int negotiated)
+{
+ unsigned int other_bits;
+
+ /* Mask out the speed and duplexity bits */
+ other_bits = negotiated & ~(GM_LPA_10 | GM_LPA_100 | GM_LPA_1000);
+
+ if (negotiated & GM_LPA_1000FULL)
+ return (other_bits | GM_LPA_1000FULL);
+ else if (negotiated & GM_LPA_1000HALF)
+ return (other_bits | GM_LPA_1000HALF);
+ else
+ return (other_bits | mii_nway_result(negotiated));
+}
+
+/* Calculate GMII non-autonegotiated link technology
+ *
+ * This provides an equivalent to gmii_nway_result for the case when
+ * autonegotiation is disabled.
+ */
+static inline unsigned int gmii_forced_result(unsigned int bmcr)
+{
+ unsigned int result;
+ int full_duplex;
+
+ full_duplex = bmcr & BMCR_FULLDPLX;
+ if (bmcr & BMCR_SPEED1000)
+ result = full_duplex ? GM_LPA_1000FULL : GM_LPA_1000HALF;
+ else if (bmcr & BMCR_SPEED100)
+ result = full_duplex ? GM_LPA_100FULL : GM_LPA_100HALF;
+ else
+ result = full_duplex ? GM_LPA_10FULL : GM_LPA_10HALF;
+ return result;
+}
+
+#endif /* EFX_GMII_H */
diff --git a/drivers/net/sfc/i2c-direct.c b/drivers/net/sfc/i2c-direct.c
new file mode 100644
index 00000000000..b6c62d0ed9c
--- /dev/null
+++ b/drivers/net/sfc/i2c-direct.c
@@ -0,0 +1,381 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/delay.h>
+#include "net_driver.h"
+#include "i2c-direct.h"
+
+/*
+ * I2C data (SDA) and clock (SCL) line read/writes with appropriate
+ * delays.
+ */
+
+static inline void setsda(struct efx_i2c_interface *i2c, int state)
+{
+ udelay(i2c->op->udelay);
+ i2c->sda = state;
+ i2c->op->setsda(i2c);
+ udelay(i2c->op->udelay);
+}
+
+static inline void setscl(struct efx_i2c_interface *i2c, int state)
+{
+ udelay(i2c->op->udelay);
+ i2c->scl = state;
+ i2c->op->setscl(i2c);
+ udelay(i2c->op->udelay);
+}
+
+static inline int getsda(struct efx_i2c_interface *i2c)
+{
+ int sda;
+
+ udelay(i2c->op->udelay);
+ sda = i2c->op->getsda(i2c);
+ udelay(i2c->op->udelay);
+ return sda;
+}
+
+static inline int getscl(struct efx_i2c_interface *i2c)
+{
+ int scl;
+
+ udelay(i2c->op->udelay);
+ scl = i2c->op->getscl(i2c);
+ udelay(i2c->op->udelay);
+ return scl;
+}
+
+/*
+ * I2C low-level protocol operations
+ *
+ */
+
+static inline void i2c_release(struct efx_i2c_interface *i2c)
+{
+ EFX_WARN_ON_PARANOID(!i2c->scl);
+ EFX_WARN_ON_PARANOID(!i2c->sda);
+ /* Devices may time out if operations do not end */
+ setscl(i2c, 1);
+ setsda(i2c, 1);
+ EFX_BUG_ON_PARANOID(getsda(i2c) != 1);
+ EFX_BUG_ON_PARANOID(getscl(i2c) != 1);
+}
+
+static inline void i2c_start(struct efx_i2c_interface *i2c)
+{
+ /* We may be restarting immediately after a {send,recv}_bit,
+ * so SCL will not necessarily already be high.
+ */
+ EFX_WARN_ON_PARANOID(!i2c->sda);
+ setscl(i2c, 1);
+ setsda(i2c, 0);
+ setscl(i2c, 0);
+ setsda(i2c, 1);
+}
+
+static inline void i2c_send_bit(struct efx_i2c_interface *i2c, int bit)
+{
+ EFX_WARN_ON_PARANOID(i2c->scl != 0);
+ setsda(i2c, bit);
+ setscl(i2c, 1);
+ setscl(i2c, 0);
+ setsda(i2c, 1);
+}
+
+static inline int i2c_recv_bit(struct efx_i2c_interface *i2c)
+{
+ int bit;
+
+ EFX_WARN_ON_PARANOID(i2c->scl != 0);
+ EFX_WARN_ON_PARANOID(!i2c->sda);
+ setscl(i2c, 1);
+ bit = getsda(i2c);
+ setscl(i2c, 0);
+ return bit;
+}
+
+static inline void i2c_stop(struct efx_i2c_interface *i2c)
+{
+ EFX_WARN_ON_PARANOID(i2c->scl != 0);
+ setsda(i2c, 0);
+ setscl(i2c, 1);
+ setsda(i2c, 1);
+}
+
+/*
+ * I2C mid-level protocol operations
+ *
+ */
+
+/* Sends a byte via the I2C bus and checks for an acknowledgement from
+ * the slave device.
+ */
+static int i2c_send_byte(struct efx_i2c_interface *i2c, u8 byte)
+{
+ int i;
+
+ /* Send byte */
+ for (i = 0; i < 8; i++) {
+ i2c_send_bit(i2c, !!(byte & 0x80));
+ byte <<= 1;
+ }
+
+ /* Check for acknowledgement from slave */
+ return (i2c_recv_bit(i2c) == 0 ? 0 : -EIO);
+}
+
+/* Receives a byte via the I2C bus and sends ACK/NACK to the slave device. */
+static u8 i2c_recv_byte(struct efx_i2c_interface *i2c, int ack)
+{
+ u8 value = 0;
+ int i;
+
+ /* Receive byte */
+ for (i = 0; i < 8; i++)
+ value = (value << 1) | i2c_recv_bit(i2c);
+
+ /* Send ACK/NACK */
+ i2c_send_bit(i2c, (ack ? 0 : 1));
+
+ return value;
+}
+
+/* Calculate command byte for a read operation */
+static inline u8 i2c_read_cmd(u8 device_id)
+{
+ return ((device_id << 1) | 1);
+}
+
+/* Calculate command byte for a write operation */
+static inline u8 i2c_write_cmd(u8 device_id)
+{
+ return ((device_id << 1) | 0);
+}
+
+int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id)
+{
+ int rc;
+
+ /* If someone is driving the bus low we just give up. */
+ if (getsda(i2c) == 0 || getscl(i2c) == 0) {
+ EFX_ERR(i2c->efx, "%s someone is holding the I2C bus low."
+ " Giving up.\n", __func__);
+ return -EFAULT;
+ }
+
+ /* Pretend to initiate a device write */
+ i2c_start(i2c);
+ rc = i2c_send_byte(i2c, i2c_write_cmd(device_id));
+ if (rc)
+ goto out;
+
+ out:
+ i2c_stop(i2c);
+ i2c_release(i2c);
+
+ return rc;
+}
+
+/* This performs a fast read of one or more consecutive bytes from an
+ * I2C device. Not all devices support consecutive reads of more than
+ * one byte; for these devices use efx_i2c_read() instead.
+ */
+int efx_i2c_fast_read(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, u8 *data, unsigned int len)
+{
+ int i;
+ int rc;
+
+ EFX_WARN_ON_PARANOID(getsda(i2c) != 1);
+ EFX_WARN_ON_PARANOID(getscl(i2c) != 1);
+ EFX_WARN_ON_PARANOID(data == NULL);
+ EFX_WARN_ON_PARANOID(len < 1);
+
+ /* Select device and starting offset */
+ i2c_start(i2c);
+ rc = i2c_send_byte(i2c, i2c_write_cmd(device_id));
+ if (rc)
+ goto out;
+ rc = i2c_send_byte(i2c, offset);
+ if (rc)
+ goto out;
+
+ /* Read data from device */
+ i2c_start(i2c);
+ rc = i2c_send_byte(i2c, i2c_read_cmd(device_id));
+ if (rc)
+ goto out;
+ for (i = 0; i < (len - 1); i++)
+ /* Read and acknowledge all but the last byte */
+ data[i] = i2c_recv_byte(i2c, 1);
+ /* Read last byte with no acknowledgement */
+ data[i] = i2c_recv_byte(i2c, 0);
+
+ out:
+ i2c_stop(i2c);
+ i2c_release(i2c);
+
+ return rc;
+}
+
+/* This performs a fast write of one or more consecutive bytes to an
+ * I2C device. Not all devices support consecutive writes of more
+ * than one byte; for these devices use efx_i2c_write() instead.
+ */
+int efx_i2c_fast_write(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset,
+ const u8 *data, unsigned int len)
+{
+ int i;
+ int rc;
+
+ EFX_WARN_ON_PARANOID(getsda(i2c) != 1);
+ EFX_WARN_ON_PARANOID(getscl(i2c) != 1);
+ EFX_WARN_ON_PARANOID(len < 1);
+
+ /* Select device and starting offset */
+ i2c_start(i2c);
+ rc = i2c_send_byte(i2c, i2c_write_cmd(device_id));
+ if (rc)
+ goto out;
+ rc = i2c_send_byte(i2c, offset);
+ if (rc)
+ goto out;
+
+ /* Write data to device */
+ for (i = 0; i < len; i++) {
+ rc = i2c_send_byte(i2c, data[i]);
+ if (rc)
+ goto out;
+ }
+
+ out:
+ i2c_stop(i2c);
+ i2c_release(i2c);
+
+ return rc;
+}
+
+/* I2C byte-by-byte read */
+int efx_i2c_read(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, u8 *data, unsigned int len)
+{
+ int rc;
+
+ /* i2c_fast_read with length 1 is a single byte read */
+ for (; len > 0; offset++, data++, len--) {
+ rc = efx_i2c_fast_read(i2c, device_id, offset, data, 1);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+/* I2C byte-by-byte write */
+int efx_i2c_write(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, const u8 *data, unsigned int len)
+{
+ int rc;
+
+ /* i2c_fast_write with length 1 is a single byte write */
+ for (; len > 0; offset++, data++, len--) {
+ rc = efx_i2c_fast_write(i2c, device_id, offset, data, 1);
+ if (rc)
+ return rc;
+ mdelay(i2c->op->mdelay);
+ }
+
+ return 0;
+}
+
+
+/* This is just a slightly neater wrapper round efx_i2c_fast_write
+ * in the case where the target doesn't take an offset
+ */
+int efx_i2c_send_bytes(struct efx_i2c_interface *i2c,
+ u8 device_id, const u8 *data, unsigned int len)
+{
+ return efx_i2c_fast_write(i2c, device_id, data[0], data + 1, len - 1);
+}
+
+/* I2C receiving of bytes - does not send an offset byte */
+int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id,
+ u8 *bytes, unsigned int len)
+{
+ int i;
+ int rc;
+
+ EFX_WARN_ON_PARANOID(getsda(i2c) != 1);
+ EFX_WARN_ON_PARANOID(getscl(i2c) != 1);
+ EFX_WARN_ON_PARANOID(len < 1);
+
+ /* Select device */
+ i2c_start(i2c);
+
+ /* Read data from device */
+ rc = i2c_send_byte(i2c, i2c_read_cmd(device_id));
+ if (rc)
+ goto out;
+
+ for (i = 0; i < (len - 1); i++)
+ /* Read and acknowledge all but the last byte */
+ bytes[i] = i2c_recv_byte(i2c, 1);
+ /* Read last byte with no acknowledgement */
+ bytes[i] = i2c_recv_byte(i2c, 0);
+
+ out:
+ i2c_stop(i2c);
+ i2c_release(i2c);
+
+ return rc;
+}
+
+/* SMBus and some I2C devices will time out if the I2C clock is
+ * held low for too long. This is most likely to happen in virtualised
+ * systems (when the entire domain is descheduled) but could in
+ * principle happen due to preemption on any busy system (and given the
+ * potential length of an I2C operation turning preemption off is not
+ * a sensible option). The following functions deal with the failure by
+ * retrying up to a fixed number of times.
+ */
+
+#define I2C_MAX_RETRIES (10)
+
+/* The timeout problem will result in -EIO. If the wrapped function
+ * returns any other error, pass this up and do not retry. */
+#define RETRY_WRAPPER(_f) \
+ int retries = I2C_MAX_RETRIES; \
+ int rc; \
+ while (retries) { \
+ rc = _f; \
+ if (rc != -EIO) \
+ return rc; \
+ retries--; \
+ } \
+ return rc; \
+
+int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c, u8 device_id)
+{
+ RETRY_WRAPPER(efx_i2c_check_presence(i2c, device_id))
+}
+
+int efx_i2c_read_retry(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, u8 *data, unsigned int len)
+{
+ RETRY_WRAPPER(efx_i2c_read(i2c, device_id, offset, data, len))
+}
+
+int efx_i2c_write_retry(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, const u8 *data, unsigned int len)
+{
+ RETRY_WRAPPER(efx_i2c_write(i2c, device_id, offset, data, len))
+}
diff --git a/drivers/net/sfc/i2c-direct.h b/drivers/net/sfc/i2c-direct.h
new file mode 100644
index 00000000000..291e561071f
--- /dev/null
+++ b/drivers/net/sfc/i2c-direct.h
@@ -0,0 +1,91 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005 Fen Systems Ltd.
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_I2C_DIRECT_H
+#define EFX_I2C_DIRECT_H
+
+#include "net_driver.h"
+
+/*
+ * Direct control of an I2C bus
+ */
+
+struct efx_i2c_interface;
+
+/**
+ * struct efx_i2c_bit_operations - I2C bus direct control methods
+ *
+ * I2C bus direct control methods.
+ *
+ * @setsda: Set state of SDA line
+ * @setscl: Set state of SCL line
+ * @getsda: Get state of SDA line
+ * @getscl: Get state of SCL line
+ * @udelay: Delay between each bit operation
+ * @mdelay: Delay between each byte write
+ */
+struct efx_i2c_bit_operations {
+ void (*setsda) (struct efx_i2c_interface *i2c);
+ void (*setscl) (struct efx_i2c_interface *i2c);
+ int (*getsda) (struct efx_i2c_interface *i2c);
+ int (*getscl) (struct efx_i2c_interface *i2c);
+ unsigned int udelay;
+ unsigned int mdelay;
+};
+
+/**
+ * struct efx_i2c_interface - an I2C interface
+ *
+ * An I2C interface.
+ *
+ * @efx: Attached Efx NIC
+ * @op: I2C bus control methods
+ * @sda: Current output state of SDA line
+ * @scl: Current output state of SCL line
+ */
+struct efx_i2c_interface {
+ struct efx_nic *efx;
+ struct efx_i2c_bit_operations *op;
+ unsigned int sda:1;
+ unsigned int scl:1;
+};
+
+extern int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id);
+extern int efx_i2c_fast_read(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset,
+ u8 *data, unsigned int len);
+extern int efx_i2c_fast_write(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset,
+ const u8 *data, unsigned int len);
+extern int efx_i2c_read(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, u8 *data, unsigned int len);
+extern int efx_i2c_write(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset,
+ const u8 *data, unsigned int len);
+
+extern int efx_i2c_send_bytes(struct efx_i2c_interface *i2c, u8 device_id,
+ const u8 *bytes, unsigned int len);
+
+extern int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id,
+ u8 *bytes, unsigned int len);
+
+
+/* Versions of the API that retry on failure. */
+extern int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c,
+ u8 device_id);
+
+extern int efx_i2c_read_retry(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset, u8 *data, unsigned int len);
+
+extern int efx_i2c_write_retry(struct efx_i2c_interface *i2c,
+ u8 device_id, u8 offset,
+ const u8 *data, unsigned int len);
+
+#endif /* EFX_I2C_DIRECT_H */
diff --git a/drivers/net/sfc/mac.h b/drivers/net/sfc/mac.h
new file mode 100644
index 00000000000..edd07d4dee1
--- /dev/null
+++ b/drivers/net/sfc/mac.h
@@ -0,0 +1,33 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_MAC_H
+#define EFX_MAC_H
+
+#include "net_driver.h"
+
+extern void falcon_xmac_writel(struct efx_nic *efx,
+ efx_dword_t *value, unsigned int mac_reg);
+extern void falcon_xmac_readl(struct efx_nic *efx,
+ efx_dword_t *value, unsigned int mac_reg);
+extern int falcon_init_xmac(struct efx_nic *efx);
+extern void falcon_reconfigure_xmac(struct efx_nic *efx);
+extern void falcon_update_stats_xmac(struct efx_nic *efx);
+extern void falcon_fini_xmac(struct efx_nic *efx);
+extern int falcon_check_xmac(struct efx_nic *efx);
+extern void falcon_xmac_sim_phy_event(struct efx_nic *efx);
+extern int falcon_xmac_get_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd);
+extern int falcon_xmac_set_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd);
+extern int falcon_xmac_set_pause(struct efx_nic *efx,
+ enum efx_fc_type pause_params);
+
+#endif
diff --git a/drivers/net/sfc/mdio_10g.c b/drivers/net/sfc/mdio_10g.c
new file mode 100644
index 00000000000..dc06bb0aa57
--- /dev/null
+++ b/drivers/net/sfc/mdio_10g.c
@@ -0,0 +1,282 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+/*
+ * Useful functions for working with MDIO clause 45 PHYs
+ */
+#include <linux/types.h>
+#include <linux/ethtool.h>
+#include <linux/delay.h>
+#include "net_driver.h"
+#include "mdio_10g.h"
+#include "boards.h"
+
+int mdio_clause45_reset_mmd(struct efx_nic *port, int mmd,
+ int spins, int spintime)
+{
+ u32 ctrl;
+ int phy_id = port->mii.phy_id;
+
+ /* Catch callers passing values in the wrong units (or just silly) */
+ EFX_BUG_ON_PARANOID(spins * spintime >= 5000);
+
+ mdio_clause45_write(port, phy_id, mmd, MDIO_MMDREG_CTRL1,
+ (1 << MDIO_MMDREG_CTRL1_RESET_LBN));
+ /* Wait for the reset bit to clear. */
+ do {
+ msleep(spintime);
+ ctrl = mdio_clause45_read(port, phy_id, mmd, MDIO_MMDREG_CTRL1);
+ spins--;
+
+ } while (spins && (ctrl & (1 << MDIO_MMDREG_CTRL1_RESET_LBN)));
+
+ return spins ? spins : -ETIMEDOUT;
+}
+
+static int mdio_clause45_check_mmd(struct efx_nic *efx, int mmd,
+ int fault_fatal)
+{
+ int status;
+ int phy_id = efx->mii.phy_id;
+
+ /* Read MMD STATUS2 to check it is responding. */
+ status = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_STAT2);
+ if (((status >> MDIO_MMDREG_STAT2_PRESENT_LBN) &
+ ((1 << MDIO_MMDREG_STAT2_PRESENT_WIDTH) - 1)) !=
+ MDIO_MMDREG_STAT2_PRESENT_VAL) {
+ EFX_ERR(efx, "PHY MMD %d not responding.\n", mmd);
+ return -EIO;
+ }
+
+ /* Read MMD STATUS 1 to check for fault. */
+ status = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_STAT1);
+ if ((status & (1 << MDIO_MMDREG_STAT1_FAULT_LBN)) != 0) {
+ if (fault_fatal) {
+ EFX_ERR(efx, "PHY MMD %d reporting fatal"
+ " fault: status %x\n", mmd, status);
+ return -EIO;
+ } else {
+ EFX_LOG(efx, "PHY MMD %d reporting status"
+ " %x (expected)\n", mmd, status);
+ }
+ }
+ return 0;
+}
+
+/* This ought to be ridiculous overkill. We expect it to fail rarely */
+#define MDIO45_RESET_TIME 1000 /* ms */
+#define MDIO45_RESET_ITERS 100
+
+int mdio_clause45_wait_reset_mmds(struct efx_nic *efx,
+ unsigned int mmd_mask)
+{
+ const int spintime = MDIO45_RESET_TIME / MDIO45_RESET_ITERS;
+ int tries = MDIO45_RESET_ITERS;
+ int rc = 0;
+ int in_reset;
+
+ while (tries) {
+ int mask = mmd_mask;
+ int mmd = 0;
+ int stat;
+ in_reset = 0;
+ while (mask) {
+ if (mask & 1) {
+ stat = mdio_clause45_read(efx,
+ efx->mii.phy_id,
+ mmd,
+ MDIO_MMDREG_CTRL1);
+ if (stat < 0) {
+ EFX_ERR(efx, "failed to read status of"
+ " MMD %d\n", mmd);
+ return -EIO;
+ }
+ if (stat & (1 << MDIO_MMDREG_CTRL1_RESET_LBN))
+ in_reset |= (1 << mmd);
+ }
+ mask = mask >> 1;
+ mmd++;
+ }
+ if (!in_reset)
+ break;
+ tries--;
+ msleep(spintime);
+ }
+ if (in_reset != 0) {
+ EFX_ERR(efx, "not all MMDs came out of reset in time."
+ " MMDs still in reset: %x\n", in_reset);
+ rc = -ETIMEDOUT;
+ }
+ return rc;
+}
+
+int mdio_clause45_check_mmds(struct efx_nic *efx,
+ unsigned int mmd_mask, unsigned int fatal_mask)
+{
+ int devices, mmd = 0;
+ int probe_mmd;
+
+ /* Historically we have probed the PHYXS to find out what devices are
+ * present,but that doesn't work so well if the PHYXS isn't expected
+ * to exist, if so just find the first item in the list supplied. */
+ probe_mmd = (mmd_mask & MDIO_MMDREG_DEVS0_PHYXS) ? MDIO_MMD_PHYXS :
+ __ffs(mmd_mask);
+ devices = mdio_clause45_read(efx, efx->mii.phy_id,
+ probe_mmd, MDIO_MMDREG_DEVS0);
+
+ /* Check all the expected MMDs are present */
+ if (devices < 0) {
+ EFX_ERR(efx, "failed to read devices present\n");
+ return -EIO;
+ }
+ if ((devices & mmd_mask) != mmd_mask) {
+ EFX_ERR(efx, "required MMDs not present: got %x, "
+ "wanted %x\n", devices, mmd_mask);
+ return -ENODEV;
+ }
+ EFX_TRACE(efx, "Devices present: %x\n", devices);
+
+ /* Check all required MMDs are responding and happy. */
+ while (mmd_mask) {
+ if (mmd_mask & 1) {
+ int fault_fatal = fatal_mask & 1;
+ if (mdio_clause45_check_mmd(efx, mmd, fault_fatal))
+ return -EIO;
+ }
+ mmd_mask = mmd_mask >> 1;
+ fatal_mask = fatal_mask >> 1;
+ mmd++;
+ }
+
+ return 0;
+}
+
+int mdio_clause45_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
+{
+ int phy_id = efx->mii.phy_id;
+ int status;
+ int ok = 1;
+ int mmd = 0;
+ int good;
+
+ while (mmd_mask) {
+ if (mmd_mask & 1) {
+ /* Double reads because link state is latched, and a
+ * read moves the current state into the register */
+ status = mdio_clause45_read(efx, phy_id,
+ mmd, MDIO_MMDREG_STAT1);
+ status = mdio_clause45_read(efx, phy_id,
+ mmd, MDIO_MMDREG_STAT1);
+
+ good = status & (1 << MDIO_MMDREG_STAT1_LINK_LBN);
+ ok = ok && good;
+ }
+ mmd_mask = (mmd_mask >> 1);
+ mmd++;
+ }
+ return ok;
+}
+
+/**
+ * mdio_clause45_get_settings - Read (some of) the PHY settings over MDIO.
+ * @efx: Efx NIC
+ * @ecmd: Buffer for settings
+ *
+ * On return the 'port', 'speed', 'supported' and 'advertising' fields of
+ * ecmd have been filled out based on the PMA type.
+ */
+void mdio_clause45_get_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd)
+{
+ int pma_type;
+
+ /* If no PMA is present we are presumably talking something XAUI-ish
+ * like CX4. Which we report as FIBRE (see below) */
+ if ((efx->phy_op->mmds & DEV_PRESENT_BIT(MDIO_MMD_PMAPMD)) == 0) {
+ ecmd->speed = SPEED_10000;
+ ecmd->port = PORT_FIBRE;
+ ecmd->supported = SUPPORTED_FIBRE;
+ ecmd->advertising = ADVERTISED_FIBRE;
+ return;
+ }
+
+ pma_type = mdio_clause45_read(efx, efx->mii.phy_id,
+ MDIO_MMD_PMAPMD, MDIO_MMDREG_CTRL2);
+ pma_type &= MDIO_PMAPMD_CTRL2_TYPE_MASK;
+
+ switch (pma_type) {
+ /* We represent CX4 as fibre in the absence of anything
+ better. */
+ case MDIO_PMAPMD_CTRL2_10G_CX4:
+ ecmd->speed = SPEED_10000;
+ ecmd->port = PORT_FIBRE;
+ ecmd->supported = SUPPORTED_FIBRE;
+ ecmd->advertising = ADVERTISED_FIBRE;
+ break;
+ /* 10G Base-T */
+ case MDIO_PMAPMD_CTRL2_10G_BT:
+ ecmd->speed = SPEED_10000;
+ ecmd->port = PORT_TP;
+ ecmd->supported = SUPPORTED_TP | SUPPORTED_10000baseT_Full;
+ ecmd->advertising = (ADVERTISED_FIBRE
+ | ADVERTISED_10000baseT_Full);
+ break;
+ case MDIO_PMAPMD_CTRL2_1G_BT:
+ ecmd->speed = SPEED_1000;
+ ecmd->port = PORT_TP;
+ ecmd->supported = SUPPORTED_TP | SUPPORTED_1000baseT_Full;
+ ecmd->advertising = (ADVERTISED_FIBRE
+ | ADVERTISED_1000baseT_Full);
+ break;
+ case MDIO_PMAPMD_CTRL2_100_BT:
+ ecmd->speed = SPEED_100;
+ ecmd->port = PORT_TP;
+ ecmd->supported = SUPPORTED_TP | SUPPORTED_100baseT_Full;
+ ecmd->advertising = (ADVERTISED_FIBRE
+ | ADVERTISED_100baseT_Full);
+ break;
+ case MDIO_PMAPMD_CTRL2_10_BT:
+ ecmd->speed = SPEED_10;
+ ecmd->port = PORT_TP;
+ ecmd->supported = SUPPORTED_TP | SUPPORTED_10baseT_Full;
+ ecmd->advertising = ADVERTISED_FIBRE | ADVERTISED_10baseT_Full;
+ break;
+ /* All the other defined modes are flavours of
+ * 10G optical */
+ default:
+ ecmd->speed = SPEED_10000;
+ ecmd->port = PORT_FIBRE;
+ ecmd->supported = SUPPORTED_FIBRE;
+ ecmd->advertising = ADVERTISED_FIBRE;
+ break;
+ }
+}
+
+/**
+ * mdio_clause45_set_settings - Set (some of) the PHY settings over MDIO.
+ * @efx: Efx NIC
+ * @ecmd: New settings
+ *
+ * Currently this just enforces that we are _not_ changing the
+ * 'port', 'speed', 'supported' or 'advertising' settings as these
+ * cannot be changed on any currently supported PHY.
+ */
+int mdio_clause45_set_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd)
+{
+ struct ethtool_cmd tmpcmd;
+ mdio_clause45_get_settings(efx, &tmpcmd);
+ /* None of the current PHYs support more than one mode
+ * of operation (and only 10GBT ever will), so keep things
+ * simple for now */
+ if ((ecmd->speed == tmpcmd.speed) && (ecmd->port == tmpcmd.port) &&
+ (ecmd->supported == tmpcmd.supported) &&
+ (ecmd->advertising == tmpcmd.advertising))
+ return 0;
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/net/sfc/mdio_10g.h b/drivers/net/sfc/mdio_10g.h
new file mode 100644
index 00000000000..2214b6d820a
--- /dev/null
+++ b/drivers/net/sfc/mdio_10g.h
@@ -0,0 +1,232 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_MDIO_10G_H
+#define EFX_MDIO_10G_H
+
+/*
+ * Definitions needed for doing 10G MDIO as specified in clause 45
+ * MDIO, which do not appear in Linux yet. Also some helper functions.
+ */
+
+#include "efx.h"
+#include "boards.h"
+
+/* Numbering of the MDIO Manageable Devices (MMDs) */
+/* Physical Medium Attachment/ Physical Medium Dependent sublayer */
+#define MDIO_MMD_PMAPMD (1)
+/* WAN Interface Sublayer */
+#define MDIO_MMD_WIS (2)
+/* Physical Coding Sublayer */
+#define MDIO_MMD_PCS (3)
+/* PHY Extender Sublayer */
+#define MDIO_MMD_PHYXS (4)
+/* Extender Sublayer */
+#define MDIO_MMD_DTEXS (5)
+/* Transmission convergence */
+#define MDIO_MMD_TC (6)
+/* Auto negotiation */
+#define MDIO_MMD_AN (7)
+
+/* Generic register locations */
+#define MDIO_MMDREG_CTRL1 (0)
+#define MDIO_MMDREG_STAT1 (1)
+#define MDIO_MMDREG_IDHI (2)
+#define MDIO_MMDREG_IDLOW (3)
+#define MDIO_MMDREG_SPEED (4)
+#define MDIO_MMDREG_DEVS0 (5)
+#define MDIO_MMDREG_DEVS1 (6)
+#define MDIO_MMDREG_CTRL2 (7)
+#define MDIO_MMDREG_STAT2 (8)
+
+/* Bits in MMDREG_CTRL1 */
+/* Reset */
+#define MDIO_MMDREG_CTRL1_RESET_LBN (15)
+#define MDIO_MMDREG_CTRL1_RESET_WIDTH (1)
+
+/* Bits in MMDREG_STAT1 */
+#define MDIO_MMDREG_STAT1_FAULT_LBN (7)
+#define MDIO_MMDREG_STAT1_FAULT_WIDTH (1)
+/* Link state */
+#define MDIO_MMDREG_STAT1_LINK_LBN (2)
+#define MDIO_MMDREG_STAT1_LINK_WIDTH (1)
+
+/* Bits in ID reg */
+#define MDIO_ID_REV(_id32) (_id32 & 0xf)
+#define MDIO_ID_MODEL(_id32) ((_id32 >> 4) & 0x3f)
+#define MDIO_ID_OUI(_id32) (_id32 >> 10)
+
+/* Bits in MMDREG_DEVS0. Someone thoughtfully layed things out
+ * so the 'bit present' bit number of an MMD is the number of
+ * that MMD */
+#define DEV_PRESENT_BIT(_b) (1 << _b)
+
+#define MDIO_MMDREG_DEVS0_PHYXS DEV_PRESENT_BIT(MDIO_MMD_PHYXS)
+#define MDIO_MMDREG_DEVS0_PCS DEV_PRESENT_BIT(MDIO_MMD_PCS)
+#define MDIO_MMDREG_DEVS0_PMAPMD DEV_PRESENT_BIT(MDIO_MMD_PMAPMD)
+
+/* Bits in MMDREG_STAT2 */
+#define MDIO_MMDREG_STAT2_PRESENT_VAL (2)
+#define MDIO_MMDREG_STAT2_PRESENT_LBN (14)
+#define MDIO_MMDREG_STAT2_PRESENT_WIDTH (2)
+
+/* PMA type (4 bits) */
+#define MDIO_PMAPMD_CTRL2_10G_CX4 (0x0)
+#define MDIO_PMAPMD_CTRL2_10G_EW (0x1)
+#define MDIO_PMAPMD_CTRL2_10G_LW (0x2)
+#define MDIO_PMAPMD_CTRL2_10G_SW (0x3)
+#define MDIO_PMAPMD_CTRL2_10G_LX4 (0x4)
+#define MDIO_PMAPMD_CTRL2_10G_ER (0x5)
+#define MDIO_PMAPMD_CTRL2_10G_LR (0x6)
+#define MDIO_PMAPMD_CTRL2_10G_SR (0x7)
+/* Reserved */
+#define MDIO_PMAPMD_CTRL2_10G_BT (0x9)
+/* Reserved */
+/* Reserved */
+#define MDIO_PMAPMD_CTRL2_1G_BT (0xc)
+/* Reserved */
+#define MDIO_PMAPMD_CTRL2_100_BT (0xe)
+#define MDIO_PMAPMD_CTRL2_10_BT (0xf)
+#define MDIO_PMAPMD_CTRL2_TYPE_MASK (0xf)
+
+/* /\* PHY XGXS lane state *\/ */
+#define MDIO_PHYXS_LANE_STATE (0x18)
+#define MDIO_PHYXS_LANE_ALIGNED_LBN (12)
+
+/* AN registers */
+#define MDIO_AN_STATUS (1)
+#define MDIO_AN_STATUS_XNP_LBN (7)
+#define MDIO_AN_STATUS_PAGE_LBN (6)
+#define MDIO_AN_STATUS_AN_DONE_LBN (5)
+#define MDIO_AN_STATUS_LP_AN_CAP_LBN (0)
+
+#define MDIO_AN_10GBT_STATUS (33)
+#define MDIO_AN_10GBT_STATUS_MS_FLT_LBN (15) /* MASTER/SLAVE config fault */
+#define MDIO_AN_10GBT_STATUS_MS_LBN (14) /* MASTER/SLAVE config */
+#define MDIO_AN_10GBT_STATUS_LOC_OK_LBN (13) /* Local OK */
+#define MDIO_AN_10GBT_STATUS_REM_OK_LBN (12) /* Remote OK */
+#define MDIO_AN_10GBT_STATUS_LP_10G_LBN (11) /* Link partner is 10GBT capable */
+#define MDIO_AN_10GBT_STATUS_LP_LTA_LBN (10) /* LP loop timing ability */
+#define MDIO_AN_10GBT_STATUS_LP_TRR_LBN (9) /* LP Training Reset Request */
+
+
+/* Packing of the prt and dev arguments of clause 45 style MDIO into a
+ * single int so they can be passed into the mdio_read/write functions
+ * that currently exist. Note that as Falcon is the only current user,
+ * the packed form is chosen to match what Falcon needs to write into
+ * a register. This is checked at compile-time so do not change it. If
+ * your target chip needs things layed out differently you will need
+ * to unpack the arguments in your chip-specific mdio functions.
+ */
+ /* These are defined by the standard. */
+#define MDIO45_PRT_ID_WIDTH (5)
+#define MDIO45_DEV_ID_WIDTH (5)
+
+/* The prt ID is just packed in immediately to the left of the dev ID */
+#define MDIO45_PRT_DEV_WIDTH (MDIO45_PRT_ID_WIDTH + MDIO45_DEV_ID_WIDTH)
+
+#define MDIO45_PRT_ID_MASK ((1 << MDIO45_PRT_DEV_WIDTH) - 1)
+/* This is the prt + dev extended by 1 bit to hold the 'is clause 45' flag. */
+#define MDIO45_XPRT_ID_WIDTH (MDIO45_PRT_DEV_WIDTH + 1)
+#define MDIO45_XPRT_ID_MASK ((1 << MDIO45_XPRT_ID_WIDTH) - 1)
+#define MDIO45_XPRT_ID_IS10G (1 << (MDIO45_XPRT_ID_WIDTH - 1))
+
+
+#define MDIO45_PRT_ID_COMP_LBN MDIO45_DEV_ID_WIDTH
+#define MDIO45_PRT_ID_COMP_WIDTH MDIO45_PRT_ID_WIDTH
+#define MDIO45_DEV_ID_COMP_LBN 0
+#define MDIO45_DEV_ID_COMP_WIDTH MDIO45_DEV_ID_WIDTH
+
+/* Compose port and device into a phy_id */
+static inline int mdio_clause45_pack(u8 prt, u8 dev)
+{
+ efx_dword_t phy_id;
+ EFX_POPULATE_DWORD_2(phy_id, MDIO45_PRT_ID_COMP, prt,
+ MDIO45_DEV_ID_COMP, dev);
+ return MDIO45_XPRT_ID_IS10G | EFX_DWORD_VAL(phy_id);
+}
+
+static inline void mdio_clause45_unpack(u32 val, u8 *prt, u8 *dev)
+{
+ efx_dword_t phy_id;
+ EFX_POPULATE_DWORD_1(phy_id, EFX_DWORD_0, val);
+ *prt = EFX_DWORD_FIELD(phy_id, MDIO45_PRT_ID_COMP);
+ *dev = EFX_DWORD_FIELD(phy_id, MDIO45_DEV_ID_COMP);
+}
+
+static inline int mdio_clause45_read(struct efx_nic *efx,
+ u8 prt, u8 dev, u16 addr)
+{
+ return efx->mii.mdio_read(efx->net_dev,
+ mdio_clause45_pack(prt, dev), addr);
+}
+
+static inline void mdio_clause45_write(struct efx_nic *efx,
+ u8 prt, u8 dev, u16 addr, int value)
+{
+ efx->mii.mdio_write(efx->net_dev,
+ mdio_clause45_pack(prt, dev), addr, value);
+}
+
+
+static inline u32 mdio_clause45_read_id(struct efx_nic *efx, int mmd)
+{
+ int phy_id = efx->mii.phy_id;
+ u16 id_low = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_IDLOW);
+ u16 id_hi = mdio_clause45_read(efx, phy_id, mmd, MDIO_MMDREG_IDHI);
+ return (id_hi << 16) | (id_low);
+}
+
+static inline int mdio_clause45_phyxgxs_lane_sync(struct efx_nic *efx)
+{
+ int i, sync, lane_status;
+
+ for (i = 0; i < 2; ++i)
+ lane_status = mdio_clause45_read(efx, efx->mii.phy_id,
+ MDIO_MMD_PHYXS,
+ MDIO_PHYXS_LANE_STATE);
+
+ sync = (lane_status & (1 << MDIO_PHYXS_LANE_ALIGNED_LBN)) != 0;
+ if (!sync)
+ EFX_INFO(efx, "XGXS lane status: %x\n", lane_status);
+ return sync;
+}
+
+extern const char *mdio_clause45_mmd_name(int mmd);
+
+/*
+ * Reset a specific MMD and wait for reset to clear.
+ * Return number of spins left (>0) on success, -%ETIMEDOUT on failure.
+ *
+ * This function will sleep
+ */
+extern int mdio_clause45_reset_mmd(struct efx_nic *efx, int mmd,
+ int spins, int spintime);
+
+/* As mdio_clause45_check_mmd but for multiple MMDs */
+int mdio_clause45_check_mmds(struct efx_nic *efx,
+ unsigned int mmd_mask, unsigned int fatal_mask);
+
+/* Check the link status of specified mmds in bit mask */
+extern int mdio_clause45_links_ok(struct efx_nic *efx,
+ unsigned int mmd_mask);
+
+/* Read (some of) the PHY settings over MDIO */
+extern void mdio_clause45_get_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd);
+
+/* Set (some of) the PHY settings over MDIO */
+extern int mdio_clause45_set_settings(struct efx_nic *efx,
+ struct ethtool_cmd *ecmd);
+
+/* Wait for specified MMDs to exit reset within a timeout */
+extern int mdio_clause45_wait_reset_mmds(struct efx_nic *efx,
+ unsigned int mmd_mask);
+
+#endif /* EFX_MDIO_10G_H */
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
new file mode 100644
index 00000000000..c505482c252
--- /dev/null
+++ b/drivers/net/sfc/net_driver.h
@@ -0,0 +1,883 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/* Common definitions for all Efx net driver code */
+
+#ifndef EFX_NET_DRIVER_H
+#define EFX_NET_DRIVER_H
+
+#include <linux/version.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/timer.h>
+#include <linux/mii.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/inet_lro.h>
+
+#include "enum.h"
+#include "bitfield.h"
+#include "i2c-direct.h"
+
+#define EFX_MAX_LRO_DESCRIPTORS 8
+#define EFX_MAX_LRO_AGGR MAX_SKB_FRAGS
+
+/**************************************************************************
+ *
+ * Build definitions
+ *
+ **************************************************************************/
+#ifndef EFX_DRIVER_NAME
+#define EFX_DRIVER_NAME "sfc"
+#endif
+#define EFX_DRIVER_VERSION "2.2.0136"
+
+#ifdef EFX_ENABLE_DEBUG
+#define EFX_BUG_ON_PARANOID(x) BUG_ON(x)
+#define EFX_WARN_ON_PARANOID(x) WARN_ON(x)
+#else
+#define EFX_BUG_ON_PARANOID(x) do {} while (0)
+#define EFX_WARN_ON_PARANOID(x) do {} while (0)
+#endif
+
+#define NET_DEV_REGISTERED(efx) \
+ ((efx)->net_dev->reg_state == NETREG_REGISTERED)
+
+/* Include net device name in log messages if it has been registered.
+ * Use efx->name not efx->net_dev->name so that races with (un)registration
+ * are harmless.
+ */
+#define NET_DEV_NAME(efx) (NET_DEV_REGISTERED(efx) ? (efx)->name : "")
+
+/* Un-rate-limited logging */
+#define EFX_ERR(efx, fmt, args...) \
+dev_err(&((efx)->pci_dev->dev), "ERR: %s " fmt, NET_DEV_NAME(efx), ##args)
+
+#define EFX_INFO(efx, fmt, args...) \
+dev_info(&((efx)->pci_dev->dev), "INFO: %s " fmt, NET_DEV_NAME(efx), ##args)
+
+#ifdef EFX_ENABLE_DEBUG
+#define EFX_LOG(efx, fmt, args...) \
+dev_info(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args)
+#else
+#define EFX_LOG(efx, fmt, args...) \
+dev_dbg(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args)
+#endif
+
+#define EFX_TRACE(efx, fmt, args...) do {} while (0)
+
+#define EFX_REGDUMP(efx, fmt, args...) do {} while (0)
+
+/* Rate-limited logging */
+#define EFX_ERR_RL(efx, fmt, args...) \
+do {if (net_ratelimit()) EFX_ERR(efx, fmt, ##args); } while (0)
+
+#define EFX_INFO_RL(efx, fmt, args...) \
+do {if (net_ratelimit()) EFX_INFO(efx, fmt, ##args); } while (0)
+
+#define EFX_LOG_RL(efx, fmt, args...) \
+do {if (net_ratelimit()) EFX_LOG(efx, fmt, ##args); } while (0)
+
+/* Kernel headers may redefine inline anyway */
+#ifndef inline
+#define inline inline __attribute__ ((always_inline))
+#endif
+
+/**************************************************************************
+ *
+ * Efx data structures
+ *
+ **************************************************************************/
+
+#define EFX_MAX_CHANNELS 32
+#define EFX_MAX_TX_QUEUES 1
+#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
+
+/**
+ * struct efx_special_buffer - An Efx special buffer
+ * @addr: CPU base address of the buffer
+ * @dma_addr: DMA base address of the buffer
+ * @len: Buffer length, in bytes
+ * @index: Buffer index within controller;s buffer table
+ * @entries: Number of buffer table entries
+ *
+ * Special buffers are used for the event queues and the TX and RX
+ * descriptor queues for each channel. They are *not* used for the
+ * actual transmit and receive buffers.
+ *
+ * Note that for Falcon, TX and RX descriptor queues live in host memory.
+ * Allocation and freeing procedures must take this into account.
+ */
+struct efx_special_buffer {
+ void *addr;
+ dma_addr_t dma_addr;
+ unsigned int len;
+ int index;
+ int entries;
+};
+
+/**
+ * struct efx_tx_buffer - An Efx TX buffer
+ * @skb: The associated socket buffer.
+ * Set only on the final fragment of a packet; %NULL for all other
+ * fragments. When this fragment completes, then we can free this
+ * skb.
+ * @dma_addr: DMA address of the fragment.
+ * @len: Length of this fragment.
+ * This field is zero when the queue slot is empty.
+ * @continuation: True if this fragment is not the end of a packet.
+ * @unmap_single: True if pci_unmap_single should be used.
+ * @unmap_addr: DMA address to unmap
+ * @unmap_len: Length of this fragment to unmap
+ */
+struct efx_tx_buffer {
+ const struct sk_buff *skb;
+ dma_addr_t dma_addr;
+ unsigned short len;
+ unsigned char continuation;
+ unsigned char unmap_single;
+ dma_addr_t unmap_addr;
+ unsigned short unmap_len;
+};
+
+/**
+ * struct efx_tx_queue - An Efx TX queue
+ *
+ * This is a ring buffer of TX fragments.
+ * Since the TX completion path always executes on the same
+ * CPU and the xmit path can operate on different CPUs,
+ * performance is increased by ensuring that the completion
+ * path and the xmit path operate on different cache lines.
+ * This is particularly important if the xmit path is always
+ * executing on one CPU which is different from the completion
+ * path. There is also a cache line for members which are
+ * read but not written on the fast path.
+ *
+ * @efx: The associated Efx NIC
+ * @queue: DMA queue number
+ * @used: Queue is used by net driver
+ * @channel: The associated channel
+ * @buffer: The software buffer ring
+ * @txd: The hardware descriptor ring
+ * @read_count: Current read pointer.
+ * This is the number of buffers that have been removed from both rings.
+ * @stopped: Stopped flag.
+ * Set if this TX queue is currently stopping its port.
+ * @insert_count: Current insert pointer
+ * This is the number of buffers that have been added to the
+ * software ring.
+ * @write_count: Current write pointer
+ * This is the number of buffers that have been added to the
+ * hardware ring.
+ * @old_read_count: The value of read_count when last checked.
+ * This is here for performance reasons. The xmit path will
+ * only get the up-to-date value of read_count if this
+ * variable indicates that the queue is full. This is to
+ * avoid cache-line ping-pong between the xmit path and the
+ * completion path.
+ */
+struct efx_tx_queue {
+ /* Members which don't change on the fast path */
+ struct efx_nic *efx ____cacheline_aligned_in_smp;
+ int queue;
+ int used;
+ struct efx_channel *channel;
+ struct efx_nic *nic;
+ struct efx_tx_buffer *buffer;
+ struct efx_special_buffer txd;
+
+ /* Members used mainly on the completion path */
+ unsigned int read_count ____cacheline_aligned_in_smp;
+ int stopped;
+
+ /* Members used only on the xmit path */
+ unsigned int insert_count ____cacheline_aligned_in_smp;
+ unsigned int write_count;
+ unsigned int old_read_count;
+};
+
+/**
+ * struct efx_rx_buffer - An Efx RX data buffer
+ * @dma_addr: DMA base address of the buffer
+ * @skb: The associated socket buffer, if any.
+ * If both this and page are %NULL, the buffer slot is currently free.
+ * @page: The associated page buffer, if any.
+ * If both this and skb are %NULL, the buffer slot is currently free.
+ * @data: Pointer to ethernet header
+ * @len: Buffer length, in bytes.
+ * @unmap_addr: DMA address to unmap
+ */
+struct efx_rx_buffer {
+ dma_addr_t dma_addr;
+ struct sk_buff *skb;
+ struct page *page;
+ char *data;
+ unsigned int len;
+ dma_addr_t unmap_addr;
+};
+
+/**
+ * struct efx_rx_queue - An Efx RX queue
+ * @efx: The associated Efx NIC
+ * @queue: DMA queue number
+ * @used: Queue is used by net driver
+ * @channel: The associated channel
+ * @buffer: The software buffer ring
+ * @rxd: The hardware descriptor ring
+ * @added_count: Number of buffers added to the receive queue.
+ * @notified_count: Number of buffers given to NIC (<= @added_count).
+ * @removed_count: Number of buffers removed from the receive queue.
+ * @add_lock: Receive queue descriptor add spin lock.
+ * This lock must be held in order to add buffers to the RX
+ * descriptor ring (rxd and buffer) and to update added_count (but
+ * not removed_count).
+ * @max_fill: RX descriptor maximum fill level (<= ring size)
+ * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
+ * (<= @max_fill)
+ * @fast_fill_limit: The level to which a fast fill will fill
+ * (@fast_fill_trigger <= @fast_fill_limit <= @max_fill)
+ * @min_fill: RX descriptor minimum non-zero fill level.
+ * This records the minimum fill level observed when a ring
+ * refill was triggered.
+ * @min_overfill: RX descriptor minimum overflow fill level.
+ * This records the minimum fill level at which RX queue
+ * overflow was observed. It should never be set.
+ * @alloc_page_count: RX allocation strategy counter.
+ * @alloc_skb_count: RX allocation strategy counter.
+ * @work: Descriptor push work thread
+ * @buf_page: Page for next RX buffer.
+ * We can use a single page for multiple RX buffers. This tracks
+ * the remaining space in the allocation.
+ * @buf_dma_addr: Page's DMA address.
+ * @buf_data: Page's host address.
+ */
+struct efx_rx_queue {
+ struct efx_nic *efx;
+ int queue;
+ int used;
+ struct efx_channel *channel;
+ struct efx_rx_buffer *buffer;
+ struct efx_special_buffer rxd;
+
+ int added_count;
+ int notified_count;
+ int removed_count;
+ spinlock_t add_lock;
+ unsigned int max_fill;
+ unsigned int fast_fill_trigger;
+ unsigned int fast_fill_limit;
+ unsigned int min_fill;
+ unsigned int min_overfill;
+ unsigned int alloc_page_count;
+ unsigned int alloc_skb_count;
+ struct delayed_work work;
+ unsigned int slow_fill_count;
+
+ struct page *buf_page;
+ dma_addr_t buf_dma_addr;
+ char *buf_data;
+};
+
+/**
+ * struct efx_buffer - An Efx general-purpose buffer
+ * @addr: host base address of the buffer
+ * @dma_addr: DMA base address of the buffer
+ * @len: Buffer length, in bytes
+ *
+ * Falcon uses these buffers for its interrupt status registers and
+ * MAC stats dumps.
+ */
+struct efx_buffer {
+ void *addr;
+ dma_addr_t dma_addr;
+ unsigned int len;
+};
+
+
+/* Flags for channel->used_flags */
+#define EFX_USED_BY_RX 1
+#define EFX_USED_BY_TX 2
+#define EFX_USED_BY_RX_TX (EFX_USED_BY_RX | EFX_USED_BY_TX)
+
+enum efx_rx_alloc_method {
+ RX_ALLOC_METHOD_AUTO = 0,
+ RX_ALLOC_METHOD_SKB = 1,
+ RX_ALLOC_METHOD_PAGE = 2,
+};
+
+/**
+ * struct efx_channel - An Efx channel
+ *
+ * A channel comprises an event queue, at least one TX queue, at least
+ * one RX queue, and an associated tasklet for processing the event
+ * queue.
+ *
+ * @efx: Associated Efx NIC
+ * @evqnum: Event queue number
+ * @channel: Channel instance number
+ * @used_flags: Channel is used by net driver
+ * @enabled: Channel enabled indicator
+ * @irq: IRQ number (MSI and MSI-X only)
+ * @has_interrupt: Channel has an interrupt
+ * @irq_moderation: IRQ moderation value (in us)
+ * @napi_dev: Net device used with NAPI
+ * @napi_str: NAPI control structure
+ * @reset_work: Scheduled reset work thread
+ * @work_pending: Is work pending via NAPI?
+ * @eventq: Event queue buffer
+ * @eventq_read_ptr: Event queue read pointer
+ * @last_eventq_read_ptr: Last event queue read pointer value.
+ * @eventq_magic: Event queue magic value for driver-generated test events
+ * @lro_mgr: LRO state
+ * @rx_alloc_level: Watermark based heuristic counter for pushing descriptors
+ * and diagnostic counters
+ * @rx_alloc_push_pages: RX allocation method currently in use for pushing
+ * descriptors
+ * @rx_alloc_pop_pages: RX allocation method currently in use for popping
+ * descriptors
+ * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
+ * @n_rx_ip_frag_err: Count of RX IP fragment errors
+ * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors
+ * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors
+ * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
+ * @n_rx_overlength: Count of RX_OVERLENGTH errors
+ * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ */
+struct efx_channel {
+ struct efx_nic *efx;
+ int evqnum;
+ int channel;
+ int used_flags;
+ int enabled;
+ int irq;
+ unsigned int has_interrupt;
+ unsigned int irq_moderation;
+ struct net_device *napi_dev;
+ struct napi_struct napi_str;
+ struct work_struct reset_work;
+ int work_pending;
+ struct efx_special_buffer eventq;
+ unsigned int eventq_read_ptr;
+ unsigned int last_eventq_read_ptr;
+ unsigned int eventq_magic;
+
+ struct net_lro_mgr lro_mgr;
+ int rx_alloc_level;
+ int rx_alloc_push_pages;
+ int rx_alloc_pop_pages;
+
+ unsigned n_rx_tobe_disc;
+ unsigned n_rx_ip_frag_err;
+ unsigned n_rx_ip_hdr_chksum_err;
+ unsigned n_rx_tcp_udp_chksum_err;
+ unsigned n_rx_frm_trunc;
+ unsigned n_rx_overlength;
+ unsigned n_skbuff_leaks;
+
+ /* Used to pipeline received packets in order to optimise memory
+ * access with prefetches.
+ */
+ struct efx_rx_buffer *rx_pkt;
+ int rx_pkt_csummed;
+
+};
+
+/**
+ * struct efx_blinker - S/W LED blinking context
+ * @led_num: LED ID (board-specific meaning)
+ * @state: Current state - on or off
+ * @resubmit: Timer resubmission flag
+ * @timer: Control timer for blinking
+ */
+struct efx_blinker {
+ int led_num;
+ int state;
+ int resubmit;
+ struct timer_list timer;
+};
+
+
+/**
+ * struct efx_board - board information
+ * @type: Board model type
+ * @major: Major rev. ('A', 'B' ...)
+ * @minor: Minor rev. (0, 1, ...)
+ * @init: Initialisation function
+ * @init_leds: Sets up board LEDs
+ * @set_fault_led: Turns the fault LED on or off
+ * @blink: Starts/stops blinking
+ * @blinker: used to blink LEDs in software
+ */
+struct efx_board {
+ int type;
+ int major;
+ int minor;
+ int (*init) (struct efx_nic *nic);
+ /* As the LEDs are typically attached to the PHY, LEDs
+ * have a separate init callback that happens later than
+ * board init. */
+ int (*init_leds)(struct efx_nic *efx);
+ void (*set_fault_led) (struct efx_nic *efx, int state);
+ void (*blink) (struct efx_nic *efx, int start);
+ struct efx_blinker blinker;
+};
+
+enum efx_int_mode {
+ /* Be careful if altering to correct macro below */
+ EFX_INT_MODE_MSIX = 0,
+ EFX_INT_MODE_MSI = 1,
+ EFX_INT_MODE_LEGACY = 2,
+ EFX_INT_MODE_MAX /* Insert any new items before this */
+};
+#define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI)
+
+enum phy_type {
+ PHY_TYPE_NONE = 0,
+ PHY_TYPE_CX4_RTMR = 1,
+ PHY_TYPE_1G_ALASKA = 2,
+ PHY_TYPE_10XPRESS = 3,
+ PHY_TYPE_XFP = 4,
+ PHY_TYPE_PM8358 = 6,
+ PHY_TYPE_MAX /* Insert any new items before this */
+};
+
+#define PHY_ADDR_INVALID 0xff
+
+enum nic_state {
+ STATE_INIT = 0,
+ STATE_RUNNING = 1,
+ STATE_FINI = 2,
+ STATE_RESETTING = 3, /* rtnl_lock always held */
+ STATE_DISABLED = 4,
+ STATE_MAX,
+};
+
+/*
+ * Alignment of page-allocated RX buffers
+ *
+ * Controls the number of bytes inserted at the start of an RX buffer.
+ * This is the equivalent of NET_IP_ALIGN [which controls the alignment
+ * of the skb->head for hardware DMA].
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#define EFX_PAGE_IP_ALIGN 0
+#else
+#define EFX_PAGE_IP_ALIGN NET_IP_ALIGN
+#endif
+
+/*
+ * Alignment of the skb->head which wraps a page-allocated RX buffer
+ *
+ * The skb allocated to wrap an rx_buffer can have this alignment. Since
+ * the data is memcpy'd from the rx_buf, it does not need to be equal to
+ * EFX_PAGE_IP_ALIGN.
+ */
+#define EFX_PAGE_SKB_ALIGN 2
+
+/* Forward declaration */
+struct efx_nic;
+
+/* Pseudo bit-mask flow control field */
+enum efx_fc_type {
+ EFX_FC_RX = 1,
+ EFX_FC_TX = 2,
+ EFX_FC_AUTO = 4,
+};
+
+/**
+ * struct efx_phy_operations - Efx PHY operations table
+ * @init: Initialise PHY
+ * @fini: Shut down PHY
+ * @reconfigure: Reconfigure PHY (e.g. for new link parameters)
+ * @clear_interrupt: Clear down interrupt
+ * @blink: Blink LEDs
+ * @check_hw: Check hardware
+ * @reset_xaui: Reset XAUI side of PHY for (software sequenced reset)
+ * @mmds: MMD presence mask
+ */
+struct efx_phy_operations {
+ int (*init) (struct efx_nic *efx);
+ void (*fini) (struct efx_nic *efx);
+ void (*reconfigure) (struct efx_nic *efx);
+ void (*clear_interrupt) (struct efx_nic *efx);
+ int (*check_hw) (struct efx_nic *efx);
+ void (*reset_xaui) (struct efx_nic *efx);
+ int mmds;
+};
+
+/*
+ * Efx extended statistics
+ *
+ * Not all statistics are provided by all supported MACs. The purpose
+ * is this structure is to contain the raw statistics provided by each
+ * MAC.
+ */
+struct efx_mac_stats {
+ u64 tx_bytes;
+ u64 tx_good_bytes;
+ u64 tx_bad_bytes;
+ unsigned long tx_packets;
+ unsigned long tx_bad;
+ unsigned long tx_pause;
+ unsigned long tx_control;
+ unsigned long tx_unicast;
+ unsigned long tx_multicast;
+ unsigned long tx_broadcast;
+ unsigned long tx_lt64;
+ unsigned long tx_64;
+ unsigned long tx_65_to_127;
+ unsigned long tx_128_to_255;
+ unsigned long tx_256_to_511;
+ unsigned long tx_512_to_1023;
+ unsigned long tx_1024_to_15xx;
+ unsigned long tx_15xx_to_jumbo;
+ unsigned long tx_gtjumbo;
+ unsigned long tx_collision;
+ unsigned long tx_single_collision;
+ unsigned long tx_multiple_collision;
+ unsigned long tx_excessive_collision;
+ unsigned long tx_deferred;
+ unsigned long tx_late_collision;
+ unsigned long tx_excessive_deferred;
+ unsigned long tx_non_tcpudp;
+ unsigned long tx_mac_src_error;
+ unsigned long tx_ip_src_error;
+ u64 rx_bytes;
+ u64 rx_good_bytes;
+ u64 rx_bad_bytes;
+ unsigned long rx_packets;
+ unsigned long rx_good;
+ unsigned long rx_bad;
+ unsigned long rx_pause;
+ unsigned long rx_control;
+ unsigned long rx_unicast;
+ unsigned long rx_multicast;
+ unsigned long rx_broadcast;
+ unsigned long rx_lt64;
+ unsigned long rx_64;
+ unsigned long rx_65_to_127;
+ unsigned long rx_128_to_255;
+ unsigned long rx_256_to_511;
+ unsigned long rx_512_to_1023;
+ unsigned long rx_1024_to_15xx;
+ unsigned long rx_15xx_to_jumbo;
+ unsigned long rx_gtjumbo;
+ unsigned long rx_bad_lt64;
+ unsigned long rx_bad_64_to_15xx;
+ unsigned long rx_bad_15xx_to_jumbo;
+ unsigned long rx_bad_gtjumbo;
+ unsigned long rx_overflow;
+ unsigned long rx_missed;
+ unsigned long rx_false_carrier;
+ unsigned long rx_symbol_error;
+ unsigned long rx_align_error;
+ unsigned long rx_length_error;
+ unsigned long rx_internal_error;
+ unsigned long rx_good_lt64;
+};
+
+/* Number of bits used in a multicast filter hash address */
+#define EFX_MCAST_HASH_BITS 8
+
+/* Number of (single-bit) entries in a multicast filter hash */
+#define EFX_MCAST_HASH_ENTRIES (1 << EFX_MCAST_HASH_BITS)
+
+/* An Efx multicast filter hash */
+union efx_multicast_hash {
+ u8 byte[EFX_MCAST_HASH_ENTRIES / 8];
+ efx_oword_t oword[EFX_MCAST_HASH_ENTRIES / sizeof(efx_oword_t) / 8];
+};
+
+/**
+ * struct efx_nic - an Efx NIC
+ * @name: Device name (net device name or bus id before net device registered)
+ * @pci_dev: The PCI device
+ * @type: Controller type attributes
+ * @legacy_irq: IRQ number
+ * @workqueue: Workqueue for resets, port reconfigures and the HW monitor
+ * @reset_work: Scheduled reset workitem
+ * @monitor_work: Hardware monitor workitem
+ * @membase_phys: Memory BAR value as physical address
+ * @membase: Memory BAR value
+ * @biu_lock: BIU (bus interface unit) lock
+ * @interrupt_mode: Interrupt mode
+ * @i2c: I2C interface
+ * @board_info: Board-level information
+ * @state: Device state flag. Serialised by the rtnl_lock.
+ * @reset_pending: Pending reset method (normally RESET_TYPE_NONE)
+ * @tx_queue: TX DMA queues
+ * @rx_queue: RX DMA queues
+ * @channel: Channels
+ * @rss_queues: Number of RSS queues
+ * @rx_buffer_len: RX buffer length
+ * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
+ * @irq_status: Interrupt status buffer
+ * @last_irq_cpu: Last CPU to handle interrupt.
+ * This register is written with the SMP processor ID whenever an
+ * interrupt is handled. It is used by falcon_test_interrupt()
+ * to verify that an interrupt has occurred.
+ * @n_rx_nodesc_drop_cnt: RX no descriptor drop count
+ * @nic_data: Hardware dependant state
+ * @mac_lock: MAC access lock. Protects @port_enabled, efx_monitor() and
+ * efx_reconfigure_port()
+ * @port_enabled: Port enabled indicator.
+ * Serialises efx_stop_all(), efx_start_all() and efx_monitor() and
+ * efx_reconfigure_work with kernel interfaces. Safe to read under any
+ * one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must
+ * be held to modify it.
+ * @port_initialized: Port initialized?
+ * @net_dev: Operating system network device. Consider holding the rtnl lock
+ * @rx_checksum_enabled: RX checksumming enabled
+ * @netif_stop_count: Port stop count
+ * @netif_stop_lock: Port stop lock
+ * @mac_stats: MAC statistics. These include all statistics the MACs
+ * can provide. Generic code converts these into a standard
+ * &struct net_device_stats.
+ * @stats_buffer: DMA buffer for statistics
+ * @stats_lock: Statistics update lock
+ * @mac_address: Permanent MAC address
+ * @phy_type: PHY type
+ * @phy_lock: PHY access lock
+ * @phy_op: PHY interface
+ * @phy_data: PHY private data (including PHY-specific stats)
+ * @mii: PHY interface
+ * @phy_powered: PHY power state
+ * @tx_disabled: PHY transmitter turned off
+ * @link_up: Link status
+ * @link_options: Link options (MII/GMII format)
+ * @n_link_state_changes: Number of times the link has changed state
+ * @promiscuous: Promiscuous flag. Protected by netif_tx_lock.
+ * @multicast_hash: Multicast hash table
+ * @flow_control: Flow control flags - separate RX/TX so can't use link_options
+ * @reconfigure_work: work item for dealing with PHY events
+ *
+ * The @priv field of the corresponding &struct net_device points to
+ * this.
+ */
+struct efx_nic {
+ char name[IFNAMSIZ];
+ struct pci_dev *pci_dev;
+ const struct efx_nic_type *type;
+ int legacy_irq;
+ struct workqueue_struct *workqueue;
+ struct work_struct reset_work;
+ struct delayed_work monitor_work;
+ unsigned long membase_phys;
+ void __iomem *membase;
+ spinlock_t biu_lock;
+ enum efx_int_mode interrupt_mode;
+
+ struct efx_i2c_interface i2c;
+ struct efx_board board_info;
+
+ enum nic_state state;
+ enum reset_type reset_pending;
+
+ struct efx_tx_queue tx_queue[EFX_MAX_TX_QUEUES];
+ struct efx_rx_queue rx_queue[EFX_MAX_RX_QUEUES];
+ struct efx_channel channel[EFX_MAX_CHANNELS];
+
+ int rss_queues;
+ unsigned int rx_buffer_len;
+ unsigned int rx_buffer_order;
+
+ struct efx_buffer irq_status;
+ volatile signed int last_irq_cpu;
+
+ unsigned n_rx_nodesc_drop_cnt;
+
+ void *nic_data;
+
+ struct mutex mac_lock;
+ int port_enabled;
+
+ int port_initialized;
+ struct net_device *net_dev;
+ int rx_checksum_enabled;
+
+ atomic_t netif_stop_count;
+ spinlock_t netif_stop_lock;
+
+ struct efx_mac_stats mac_stats;
+ struct efx_buffer stats_buffer;
+ spinlock_t stats_lock;
+
+ unsigned char mac_address[ETH_ALEN];
+
+ enum phy_type phy_type;
+ spinlock_t phy_lock;
+ struct efx_phy_operations *phy_op;
+ void *phy_data;
+ struct mii_if_info mii;
+
+ int link_up;
+ unsigned int link_options;
+ unsigned int n_link_state_changes;
+
+ int promiscuous;
+ union efx_multicast_hash multicast_hash;
+ enum efx_fc_type flow_control;
+ struct work_struct reconfigure_work;
+
+ atomic_t rx_reset;
+};
+
+/**
+ * struct efx_nic_type - Efx device type definition
+ * @mem_bar: Memory BAR number
+ * @mem_map_size: Memory BAR mapped size
+ * @txd_ptr_tbl_base: TX descriptor ring base address
+ * @rxd_ptr_tbl_base: RX descriptor ring base address
+ * @buf_tbl_base: Buffer table base address
+ * @evq_ptr_tbl_base: Event queue pointer table base address
+ * @evq_rptr_tbl_base: Event queue read-pointer table base address
+ * @txd_ring_mask: TX descriptor ring size - 1 (must be a power of two - 1)
+ * @rxd_ring_mask: RX descriptor ring size - 1 (must be a power of two - 1)
+ * @evq_size: Event queue size (must be a power of two)
+ * @max_dma_mask: Maximum possible DMA mask
+ * @tx_dma_mask: TX DMA mask
+ * @bug5391_mask: Address mask for bug 5391 workaround
+ * @rx_xoff_thresh: RX FIFO XOFF watermark (bytes)
+ * @rx_xon_thresh: RX FIFO XON watermark (bytes)
+ * @rx_buffer_padding: Padding added to each RX buffer
+ * @max_interrupt_mode: Highest capability interrupt mode supported
+ * from &enum efx_init_mode.
+ * @phys_addr_channels: Number of channels with physically addressed
+ * descriptors
+ */
+struct efx_nic_type {
+ unsigned int mem_bar;
+ unsigned int mem_map_size;
+ unsigned int txd_ptr_tbl_base;
+ unsigned int rxd_ptr_tbl_base;
+ unsigned int buf_tbl_base;
+ unsigned int evq_ptr_tbl_base;
+ unsigned int evq_rptr_tbl_base;
+
+ unsigned int txd_ring_mask;
+ unsigned int rxd_ring_mask;
+ unsigned int evq_size;
+ dma_addr_t max_dma_mask;
+ unsigned int tx_dma_mask;
+ unsigned bug5391_mask;
+
+ int rx_xoff_thresh;
+ int rx_xon_thresh;
+ unsigned int rx_buffer_padding;
+ unsigned int max_interrupt_mode;
+ unsigned int phys_addr_channels;
+};
+
+/**************************************************************************
+ *
+ * Prototypes and inline functions
+ *
+ *************************************************************************/
+
+/* Iterate over all used channels */
+#define efx_for_each_channel(_channel, _efx) \
+ for (_channel = &_efx->channel[0]; \
+ _channel < &_efx->channel[EFX_MAX_CHANNELS]; \
+ _channel++) \
+ if (!_channel->used_flags) \
+ continue; \
+ else
+
+/* Iterate over all used channels with interrupts */
+#define efx_for_each_channel_with_interrupt(_channel, _efx) \
+ for (_channel = &_efx->channel[0]; \
+ _channel < &_efx->channel[EFX_MAX_CHANNELS]; \
+ _channel++) \
+ if (!(_channel->used_flags && _channel->has_interrupt)) \
+ continue; \
+ else
+
+/* Iterate over all used TX queues */
+#define efx_for_each_tx_queue(_tx_queue, _efx) \
+ for (_tx_queue = &_efx->tx_queue[0]; \
+ _tx_queue < &_efx->tx_queue[EFX_MAX_TX_QUEUES]; \
+ _tx_queue++) \
+ if (!_tx_queue->used) \
+ continue; \
+ else
+
+/* Iterate over all TX queues belonging to a channel */
+#define efx_for_each_channel_tx_queue(_tx_queue, _channel) \
+ for (_tx_queue = &_channel->efx->tx_queue[0]; \
+ _tx_queue < &_channel->efx->tx_queue[EFX_MAX_TX_QUEUES]; \
+ _tx_queue++) \
+ if ((!_tx_queue->used) || \
+ (_tx_queue->channel != _channel)) \
+ continue; \
+ else
+
+/* Iterate over all used RX queues */
+#define efx_for_each_rx_queue(_rx_queue, _efx) \
+ for (_rx_queue = &_efx->rx_queue[0]; \
+ _rx_queue < &_efx->rx_queue[EFX_MAX_RX_QUEUES]; \
+ _rx_queue++) \
+ if (!_rx_queue->used) \
+ continue; \
+ else
+
+/* Iterate over all RX queues belonging to a channel */
+#define efx_for_each_channel_rx_queue(_rx_queue, _channel) \
+ for (_rx_queue = &_channel->efx->rx_queue[0]; \
+ _rx_queue < &_channel->efx->rx_queue[EFX_MAX_RX_QUEUES]; \
+ _rx_queue++) \
+ if ((!_rx_queue->used) || \
+ (_rx_queue->channel != _channel)) \
+ continue; \
+ else
+
+/* Returns a pointer to the specified receive buffer in the RX
+ * descriptor queue.
+ */
+static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
+ unsigned int index)
+{
+ return (&rx_queue->buffer[index]);
+}
+
+/* Set bit in a little-endian bitfield */
+static inline void set_bit_le(int nr, unsigned char *addr)
+{
+ addr[nr / 8] |= (1 << (nr % 8));
+}
+
+/* Clear bit in a little-endian bitfield */
+static inline void clear_bit_le(int nr, unsigned char *addr)
+{
+ addr[nr / 8] &= ~(1 << (nr % 8));
+}
+
+
+/**
+ * EFX_MAX_FRAME_LEN - calculate maximum frame length
+ *
+ * This calculates the maximum frame length that will be used for a
+ * given MTU. The frame length will be equal to the MTU plus a
+ * constant amount of header space and padding. This is the quantity
+ * that the net driver will program into the MAC as the maximum frame
+ * length.
+ *
+ * The 10G MAC used in Falcon requires 8-byte alignment on the frame
+ * length, so we round up to the nearest 8.
+ */
+#define EFX_MAX_FRAME_LEN(mtu) \
+ ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */) + 7) & ~7)
+
+
+#endif /* EFX_NET_DRIVER_H */
diff --git a/drivers/net/sfc/phy.h b/drivers/net/sfc/phy.h
new file mode 100644
index 00000000000..9d02c84e6b2
--- /dev/null
+++ b/drivers/net/sfc/phy.h
@@ -0,0 +1,48 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_PHY_H
+#define EFX_PHY_H
+
+/****************************************************************************
+ * 10Xpress (SFX7101) PHY
+ */
+extern struct efx_phy_operations falcon_tenxpress_phy_ops;
+
+enum tenxpress_state {
+ TENXPRESS_STATUS_OFF = 0,
+ TENXPRESS_STATUS_OTEMP = 1,
+ TENXPRESS_STATUS_NORMAL = 2,
+};
+
+extern void tenxpress_set_state(struct efx_nic *efx,
+ enum tenxpress_state state);
+extern void tenxpress_phy_blink(struct efx_nic *efx, int blink);
+extern void tenxpress_crc_err(struct efx_nic *efx);
+
+/****************************************************************************
+ * Exported functions from the driver for XFP optical PHYs
+ */
+extern struct efx_phy_operations falcon_xfp_phy_ops;
+
+/* The QUAKE XFP PHY provides various H/W control states for LEDs */
+#define QUAKE_LED_LINK_INVAL (0)
+#define QUAKE_LED_LINK_STAT (1)
+#define QUAKE_LED_LINK_ACT (2)
+#define QUAKE_LED_LINK_ACTSTAT (3)
+#define QUAKE_LED_OFF (4)
+#define QUAKE_LED_ON (5)
+#define QUAKE_LED_LINK_INPUT (6) /* Pin is an input. */
+/* What link the LED tracks */
+#define QUAKE_LED_TXLINK (0)
+#define QUAKE_LED_RXLINK (8)
+
+extern void xfp_set_led(struct efx_nic *p, int led, int state);
+
+#endif
diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c
new file mode 100644
index 00000000000..551299b462a
--- /dev/null
+++ b/drivers/net/sfc/rx.c
@@ -0,0 +1,875 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include "net_driver.h"
+#include "rx.h"
+#include "efx.h"
+#include "falcon.h"
+#include "workarounds.h"
+
+/* Number of RX descriptors pushed at once. */
+#define EFX_RX_BATCH 8
+
+/* Size of buffer allocated for skb header area. */
+#define EFX_SKB_HEADERS 64u
+
+/*
+ * rx_alloc_method - RX buffer allocation method
+ *
+ * This driver supports two methods for allocating and using RX buffers:
+ * each RX buffer may be backed by an skb or by an order-n page.
+ *
+ * When LRO is in use then the second method has a lower overhead,
+ * since we don't have to allocate then free skbs on reassembled frames.
+ *
+ * Values:
+ * - RX_ALLOC_METHOD_AUTO = 0
+ * - RX_ALLOC_METHOD_SKB = 1
+ * - RX_ALLOC_METHOD_PAGE = 2
+ *
+ * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count
+ * controlled by the parameters below.
+ *
+ * - Since pushing and popping descriptors are separated by the rx_queue
+ * size, so the watermarks should be ~rxd_size.
+ * - The performance win by using page-based allocation for LRO is less
+ * than the performance hit of using page-based allocation of non-LRO,
+ * so the watermarks should reflect this.
+ *
+ * Per channel we maintain a single variable, updated by each channel:
+ *
+ * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO :
+ * RX_ALLOC_FACTOR_SKB)
+ * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which
+ * limits the hysteresis), and update the allocation strategy:
+ *
+ * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ?
+ * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB)
+ */
+static int rx_alloc_method = RX_ALLOC_METHOD_PAGE;
+
+#define RX_ALLOC_LEVEL_LRO 0x2000
+#define RX_ALLOC_LEVEL_MAX 0x3000
+#define RX_ALLOC_FACTOR_LRO 1
+#define RX_ALLOC_FACTOR_SKB (-2)
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold = 90;
+
+/* This is the percentage fill level to which an RX queue will be refilled
+ * when the "RX refill threshold" is reached.
+ */
+static unsigned int rx_refill_limit = 95;
+
+/*
+ * RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow and at least 2 to allow
+ * pipelined receives.
+ */
+#define EFX_RXD_HEAD_ROOM 2
+
+/* Macros for zero-order pages (potentially) containing multiple RX buffers */
+#define RX_DATA_OFFSET(_data) \
+ (((unsigned long) (_data)) & (PAGE_SIZE-1))
+#define RX_BUF_OFFSET(_rx_buf) \
+ RX_DATA_OFFSET((_rx_buf)->data)
+
+#define RX_PAGE_SIZE(_efx) \
+ (PAGE_SIZE * (1u << (_efx)->rx_buffer_order))
+
+
+/**************************************************************************
+ *
+ * Linux generic LRO handling
+ *
+ **************************************************************************
+ */
+
+static int efx_lro_get_skb_hdr(struct sk_buff *skb, void **ip_hdr,
+ void **tcpudp_hdr, u64 *hdr_flags, void *priv)
+{
+ struct efx_channel *channel = (struct efx_channel *)priv;
+ struct iphdr *iph;
+ struct tcphdr *th;
+
+ iph = (struct iphdr *)skb->data;
+ if (skb->protocol != htons(ETH_P_IP) || iph->protocol != IPPROTO_TCP)
+ goto fail;
+
+ th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+
+ *tcpudp_hdr = th;
+ *ip_hdr = iph;
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+
+ channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO;
+ return 0;
+fail:
+ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
+ return -1;
+}
+
+static int efx_get_frag_hdr(struct skb_frag_struct *frag, void **mac_hdr,
+ void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags,
+ void *priv)
+{
+ struct efx_channel *channel = (struct efx_channel *)priv;
+ struct ethhdr *eh;
+ struct iphdr *iph;
+
+ /* We support EtherII and VLAN encapsulated IPv4 */
+ eh = (struct ethhdr *)(page_address(frag->page) + frag->page_offset);
+ *mac_hdr = eh;
+
+ if (eh->h_proto == htons(ETH_P_IP)) {
+ iph = (struct iphdr *)(eh + 1);
+ } else {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)eh;
+ if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP))
+ goto fail;
+
+ iph = (struct iphdr *)(veh + 1);
+ }
+ *ip_hdr = iph;
+
+ /* We can only do LRO over TCP */
+ if (iph->protocol != IPPROTO_TCP)
+ goto fail;
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *tcpudp_hdr = (struct tcphdr *)((u8 *) iph + iph->ihl * 4);
+
+ channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO;
+ return 0;
+ fail:
+ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
+ return -1;
+}
+
+int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx)
+{
+ size_t s = sizeof(struct net_lro_desc) * EFX_MAX_LRO_DESCRIPTORS;
+ struct net_lro_desc *lro_arr;
+
+ /* Allocate the LRO descriptors structure */
+ lro_arr = kzalloc(s, GFP_KERNEL);
+ if (lro_arr == NULL)
+ return -ENOMEM;
+
+ lro_mgr->lro_arr = lro_arr;
+ lro_mgr->max_desc = EFX_MAX_LRO_DESCRIPTORS;
+ lro_mgr->max_aggr = EFX_MAX_LRO_AGGR;
+ lro_mgr->frag_align_pad = EFX_PAGE_SKB_ALIGN;
+
+ lro_mgr->get_skb_header = efx_lro_get_skb_hdr;
+ lro_mgr->get_frag_header = efx_get_frag_hdr;
+ lro_mgr->dev = efx->net_dev;
+
+ lro_mgr->features = LRO_F_NAPI;
+
+ /* We can pass packets up with the checksum intact */
+ lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
+
+ lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
+
+ return 0;
+}
+
+void efx_lro_fini(struct net_lro_mgr *lro_mgr)
+{
+ kfree(lro_mgr->lro_arr);
+ lro_mgr->lro_arr = NULL;
+}
+
+/**
+ * efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation
+ *
+ * @rx_queue: Efx RX queue
+ * @rx_buf: RX buffer structure to populate
+ *
+ * This allocates memory for a new receive buffer, maps it for DMA,
+ * and populates a struct efx_rx_buffer with the relevant
+ * information. Return a negative error code or 0 on success.
+ */
+static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct net_device *net_dev = efx->net_dev;
+ int skb_len = efx->rx_buffer_len;
+
+ rx_buf->skb = netdev_alloc_skb(net_dev, skb_len);
+ if (unlikely(!rx_buf->skb))
+ return -ENOMEM;
+
+ /* Adjust the SKB for padding and checksum */
+ skb_reserve(rx_buf->skb, NET_IP_ALIGN);
+ rx_buf->len = skb_len - NET_IP_ALIGN;
+ rx_buf->data = (char *)rx_buf->skb->data;
+ rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ rx_buf->dma_addr = pci_map_single(efx->pci_dev,
+ rx_buf->data, rx_buf->len,
+ PCI_DMA_FROMDEVICE);
+
+ if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) {
+ dev_kfree_skb_any(rx_buf->skb);
+ rx_buf->skb = NULL;
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * efx_init_rx_buffer_page - create new RX buffer using page-based allocation
+ *
+ * @rx_queue: Efx RX queue
+ * @rx_buf: RX buffer structure to populate
+ *
+ * This allocates memory for a new receive buffer, maps it for DMA,
+ * and populates a struct efx_rx_buffer with the relevant
+ * information. Return a negative error code or 0 on success.
+ */
+static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ int bytes, space, offset;
+
+ bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
+
+ /* If there is space left in the previously allocated page,
+ * then use it. Otherwise allocate a new one */
+ rx_buf->page = rx_queue->buf_page;
+ if (rx_buf->page == NULL) {
+ dma_addr_t dma_addr;
+
+ rx_buf->page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
+ efx->rx_buffer_order);
+ if (unlikely(rx_buf->page == NULL))
+ return -ENOMEM;
+
+ dma_addr = pci_map_page(efx->pci_dev, rx_buf->page,
+ 0, RX_PAGE_SIZE(efx),
+ PCI_DMA_FROMDEVICE);
+
+ if (unlikely(pci_dma_mapping_error(dma_addr))) {
+ __free_pages(rx_buf->page, efx->rx_buffer_order);
+ rx_buf->page = NULL;
+ return -EIO;
+ }
+
+ rx_queue->buf_page = rx_buf->page;
+ rx_queue->buf_dma_addr = dma_addr;
+ rx_queue->buf_data = ((char *) page_address(rx_buf->page) +
+ EFX_PAGE_IP_ALIGN);
+ }
+
+ offset = RX_DATA_OFFSET(rx_queue->buf_data);
+ rx_buf->len = bytes;
+ rx_buf->dma_addr = rx_queue->buf_dma_addr + offset;
+ rx_buf->data = rx_queue->buf_data;
+
+ /* Try to pack multiple buffers per page */
+ if (efx->rx_buffer_order == 0) {
+ /* The next buffer starts on the next 512 byte boundary */
+ rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff);
+ offset += ((bytes + 0x1ff) & ~0x1ff);
+
+ space = RX_PAGE_SIZE(efx) - offset;
+ if (space >= bytes) {
+ /* Refs dropped on kernel releasing each skb */
+ get_page(rx_queue->buf_page);
+ goto out;
+ }
+ }
+
+ /* This is the final RX buffer for this page, so mark it for
+ * unmapping */
+ rx_queue->buf_page = NULL;
+ rx_buf->unmap_addr = rx_queue->buf_dma_addr;
+
+ out:
+ return 0;
+}
+
+/* This allocates memory for a new receive buffer, maps it for DMA,
+ * and populates a struct efx_rx_buffer with the relevant
+ * information.
+ */
+static inline int efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *new_rx_buf)
+{
+ int rc = 0;
+
+ if (rx_queue->channel->rx_alloc_push_pages) {
+ new_rx_buf->skb = NULL;
+ rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf);
+ rx_queue->alloc_page_count++;
+ } else {
+ new_rx_buf->page = NULL;
+ rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf);
+ rx_queue->alloc_skb_count++;
+ }
+
+ if (unlikely(rc < 0))
+ EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__,
+ rx_queue->queue, rc);
+ return rc;
+}
+
+static inline void efx_unmap_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf)
+{
+ if (rx_buf->page) {
+ EFX_BUG_ON_PARANOID(rx_buf->skb);
+ if (rx_buf->unmap_addr) {
+ pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr,
+ RX_PAGE_SIZE(efx), PCI_DMA_FROMDEVICE);
+ rx_buf->unmap_addr = 0;
+ }
+ } else if (likely(rx_buf->skb)) {
+ pci_unmap_single(efx->pci_dev, rx_buf->dma_addr,
+ rx_buf->len, PCI_DMA_FROMDEVICE);
+ }
+}
+
+static inline void efx_free_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf)
+{
+ if (rx_buf->page) {
+ __free_pages(rx_buf->page, efx->rx_buffer_order);
+ rx_buf->page = NULL;
+ } else if (likely(rx_buf->skb)) {
+ dev_kfree_skb_any(rx_buf->skb);
+ rx_buf->skb = NULL;
+ }
+}
+
+static inline void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+ efx_free_rx_buffer(rx_queue->efx, rx_buf);
+}
+
+/**
+ * efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue: RX descriptor queue
+ * @retry: Recheck the fill level
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@fast_fill_limit. If there is insufficient atomic
+ * memory to do so, the caller should retry.
+ */
+static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue,
+ int retry)
+{
+ struct efx_rx_buffer *rx_buf;
+ unsigned fill_level, index;
+ int i, space, rc = 0;
+
+ /* Calculate current fill level. Do this outside the lock,
+ * because most of the time we'll end up not wanting to do the
+ * fill anyway.
+ */
+ fill_level = (rx_queue->added_count - rx_queue->removed_count);
+ EFX_BUG_ON_PARANOID(fill_level >
+ rx_queue->efx->type->rxd_ring_mask + 1);
+
+ /* Don't fill if we don't need to */
+ if (fill_level >= rx_queue->fast_fill_trigger)
+ return 0;
+
+ /* Record minimum fill level */
+ if (unlikely(fill_level < rx_queue->min_fill))
+ if (fill_level)
+ rx_queue->min_fill = fill_level;
+
+ /* Acquire RX add lock. If this lock is contended, then a fast
+ * fill must already be in progress (e.g. in the refill
+ * tasklet), so we don't need to do anything
+ */
+ if (!spin_trylock_bh(&rx_queue->add_lock))
+ return -1;
+
+ retry:
+ /* Recalculate current fill level now that we have the lock */
+ fill_level = (rx_queue->added_count - rx_queue->removed_count);
+ EFX_BUG_ON_PARANOID(fill_level >
+ rx_queue->efx->type->rxd_ring_mask + 1);
+ space = rx_queue->fast_fill_limit - fill_level;
+ if (space < EFX_RX_BATCH)
+ goto out_unlock;
+
+ EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from"
+ " level %d to level %d using %s allocation\n",
+ rx_queue->queue, fill_level, rx_queue->fast_fill_limit,
+ rx_queue->channel->rx_alloc_push_pages ? "page" : "skb");
+
+ do {
+ for (i = 0; i < EFX_RX_BATCH; ++i) {
+ index = (rx_queue->added_count &
+ rx_queue->efx->type->rxd_ring_mask);
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ rc = efx_init_rx_buffer(rx_queue, rx_buf);
+ if (unlikely(rc))
+ goto out;
+ ++rx_queue->added_count;
+ }
+ } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH);
+
+ EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring "
+ "to level %d\n", rx_queue->queue,
+ rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+ /* Send write pointer to card. */
+ falcon_notify_rx_desc(rx_queue);
+
+ /* If the fast fill is running inside from the refill tasklet, then
+ * for SMP systems it may be running on a different CPU to
+ * RX event processing, which means that the fill level may now be
+ * out of date. */
+ if (unlikely(retry && (rc == 0)))
+ goto retry;
+
+ out_unlock:
+ spin_unlock_bh(&rx_queue->add_lock);
+
+ return rc;
+}
+
+/**
+ * efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue: RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@fast_fill_limit. If there is insufficient memory to do so,
+ * it will schedule a work item to immediately continue the fast fill
+ */
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
+{
+ int rc;
+
+ rc = __efx_fast_push_rx_descriptors(rx_queue, 0);
+ if (unlikely(rc)) {
+ /* Schedule the work item to run immediately. The hope is
+ * that work is immediately pending to free some memory
+ * (e.g. an RX event or TX completion)
+ */
+ efx_schedule_slow_fill(rx_queue, 0);
+ }
+}
+
+void efx_rx_work(struct work_struct *data)
+{
+ struct efx_rx_queue *rx_queue;
+ int rc;
+
+ rx_queue = container_of(data, struct efx_rx_queue, work.work);
+
+ if (unlikely(!rx_queue->channel->enabled))
+ return;
+
+ EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU "
+ "%d\n", rx_queue->queue, raw_smp_processor_id());
+
+ ++rx_queue->slow_fill_count;
+ /* Push new RX descriptors, allowing at least 1 jiffy for
+ * the kernel to free some more memory. */
+ rc = __efx_fast_push_rx_descriptors(rx_queue, 1);
+ if (rc)
+ efx_schedule_slow_fill(rx_queue, 1);
+}
+
+static inline void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ int len, int *discard,
+ int *leak_packet)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
+
+ if (likely(len <= max_len))
+ return;
+
+ /* The packet must be discarded, but this is only a fatal error
+ * if the caller indicated it was
+ */
+ *discard = 1;
+
+ if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
+ EFX_ERR_RL(efx, " RX queue %d seriously overlength "
+ "RX event (0x%x > 0x%x+0x%x). Leaking\n",
+ rx_queue->queue, len, max_len,
+ efx->type->rx_buffer_padding);
+ /* If this buffer was skb-allocated, then the meta
+ * data at the end of the skb will be trashed. So
+ * we have no choice but to leak the fragment.
+ */
+ *leak_packet = (rx_buf->skb != NULL);
+ efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
+ } else {
+ EFX_ERR_RL(efx, " RX queue %d overlength RX event "
+ "(0x%x > 0x%x)\n", rx_queue->queue, len, max_len);
+ }
+
+ rx_queue->channel->n_rx_overlength++;
+}
+
+/* Pass a received packet up through the generic LRO stack
+ *
+ * Handles driverlink veto, and passes the fragment up via
+ * the appropriate LRO method
+ */
+static inline void efx_rx_packet_lro(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct net_lro_mgr *lro_mgr = &channel->lro_mgr;
+ void *priv = channel;
+
+ /* Pass the skb/page into the LRO engine */
+ if (rx_buf->page) {
+ struct skb_frag_struct frags;
+
+ frags.page = rx_buf->page;
+ frags.page_offset = RX_BUF_OFFSET(rx_buf);
+ frags.size = rx_buf->len;
+
+ lro_receive_frags(lro_mgr, &frags, rx_buf->len,
+ rx_buf->len, priv, 0);
+
+ EFX_BUG_ON_PARANOID(rx_buf->skb);
+ rx_buf->page = NULL;
+ } else {
+ EFX_BUG_ON_PARANOID(!rx_buf->skb);
+
+ lro_receive_skb(lro_mgr, rx_buf->skb, priv);
+ rx_buf->skb = NULL;
+ }
+}
+
+/* Allocate and construct an SKB around a struct page.*/
+static inline struct sk_buff *efx_rx_mk_skb(struct efx_rx_buffer *rx_buf,
+ struct efx_nic *efx,
+ int hdr_len)
+{
+ struct sk_buff *skb;
+
+ /* Allocate an SKB to store the headers */
+ skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN);
+ if (unlikely(skb == NULL)) {
+ EFX_ERR_RL(efx, "RX out of memory for skb\n");
+ return NULL;
+ }
+
+ EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags);
+ EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
+
+ skb->len = rx_buf->len;
+ skb->truesize = rx_buf->len + sizeof(struct sk_buff);
+ memcpy(skb->data, rx_buf->data, hdr_len);
+ skb->tail += hdr_len;
+
+ /* Append the remaining page onto the frag list */
+ if (unlikely(rx_buf->len > hdr_len)) {
+ struct skb_frag_struct *frag = skb_shinfo(skb)->frags;
+ frag->page = rx_buf->page;
+ frag->page_offset = RX_BUF_OFFSET(rx_buf) + hdr_len;
+ frag->size = skb->len - hdr_len;
+ skb_shinfo(skb)->nr_frags = 1;
+ skb->data_len = frag->size;
+ } else {
+ __free_pages(rx_buf->page, efx->rx_buffer_order);
+ skb->data_len = 0;
+ }
+
+ /* Ownership has transferred from the rx_buf to skb */
+ rx_buf->page = NULL;
+
+ /* Move past the ethernet header */
+ skb->protocol = eth_type_trans(skb, efx->net_dev);
+
+ return skb;
+}
+
+void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
+ unsigned int len, int checksummed, int discard)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_buffer *rx_buf;
+ int leak_packet = 0;
+
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ EFX_BUG_ON_PARANOID(!rx_buf->data);
+ EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page);
+ EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page));
+
+ /* This allows the refill path to post another buffer.
+ * EFX_RXD_HEAD_ROOM ensures that the slot we are using
+ * isn't overwritten yet.
+ */
+ rx_queue->removed_count++;
+
+ /* Validate the length encoded in the event vs the descriptor pushed */
+ efx_rx_packet__check_len(rx_queue, rx_buf, len,
+ &discard, &leak_packet);
+
+ EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n",
+ rx_queue->queue, index,
+ (unsigned long long)rx_buf->dma_addr, len,
+ (checksummed ? " [SUMMED]" : ""),
+ (discard ? " [DISCARD]" : ""));
+
+ /* Discard packet, if instructed to do so */
+ if (unlikely(discard)) {
+ if (unlikely(leak_packet))
+ rx_queue->channel->n_skbuff_leaks++;
+ else
+ /* We haven't called efx_unmap_rx_buffer yet,
+ * so fini the entire rx_buffer here */
+ efx_fini_rx_buffer(rx_queue, rx_buf);
+ return;
+ }
+
+ /* Release card resources - assumes all RX buffers consumed in-order
+ * per RX queue
+ */
+ efx_unmap_rx_buffer(efx, rx_buf);
+
+ /* Prefetch nice and early so data will (hopefully) be in cache by
+ * the time we look at it.
+ */
+ prefetch(rx_buf->data);
+
+ /* Pipeline receives so that we give time for packet headers to be
+ * prefetched into cache.
+ */
+ rx_buf->len = len;
+ if (rx_queue->channel->rx_pkt)
+ __efx_rx_packet(rx_queue->channel,
+ rx_queue->channel->rx_pkt,
+ rx_queue->channel->rx_pkt_csummed);
+ rx_queue->channel->rx_pkt = rx_buf;
+ rx_queue->channel->rx_pkt_csummed = checksummed;
+}
+
+/* Handle a received packet. Second half: Touches packet payload. */
+void __efx_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf, int checksummed)
+{
+ struct efx_nic *efx = channel->efx;
+ struct sk_buff *skb;
+ int lro = efx->net_dev->features & NETIF_F_LRO;
+
+ if (rx_buf->skb) {
+ prefetch(skb_shinfo(rx_buf->skb));
+
+ skb_put(rx_buf->skb, rx_buf->len);
+
+ /* Move past the ethernet header. rx_buf->data still points
+ * at the ethernet header */
+ rx_buf->skb->protocol = eth_type_trans(rx_buf->skb,
+ efx->net_dev);
+ }
+
+ /* Both our generic-LRO and SFC-SSR support skb and page based
+ * allocation, but neither support switching from one to the
+ * other on the fly. If we spot that the allocation mode has
+ * changed, then flush the LRO state.
+ */
+ if (unlikely(channel->rx_alloc_pop_pages != (rx_buf->page != NULL))) {
+ efx_flush_lro(channel);
+ channel->rx_alloc_pop_pages = (rx_buf->page != NULL);
+ }
+ if (likely(checksummed && lro)) {
+ efx_rx_packet_lro(channel, rx_buf);
+ goto done;
+ }
+
+ /* Form an skb if required */
+ if (rx_buf->page) {
+ int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS);
+ skb = efx_rx_mk_skb(rx_buf, efx, hdr_len);
+ if (unlikely(skb == NULL)) {
+ efx_free_rx_buffer(efx, rx_buf);
+ goto done;
+ }
+ } else {
+ /* We now own the SKB */
+ skb = rx_buf->skb;
+ rx_buf->skb = NULL;
+ }
+
+ EFX_BUG_ON_PARANOID(rx_buf->page);
+ EFX_BUG_ON_PARANOID(rx_buf->skb);
+ EFX_BUG_ON_PARANOID(!skb);
+
+ /* Set the SKB flags */
+ if (unlikely(!checksummed || !efx->rx_checksum_enabled))
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Pass the packet up */
+ netif_receive_skb(skb);
+
+ /* Update allocation strategy method */
+ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
+
+ /* fall-thru */
+done:
+ efx->net_dev->last_rx = jiffies;
+}
+
+void efx_rx_strategy(struct efx_channel *channel)
+{
+ enum efx_rx_alloc_method method = rx_alloc_method;
+
+ /* Only makes sense to use page based allocation if LRO is enabled */
+ if (!(channel->efx->net_dev->features & NETIF_F_LRO)) {
+ method = RX_ALLOC_METHOD_SKB;
+ } else if (method == RX_ALLOC_METHOD_AUTO) {
+ /* Constrain the rx_alloc_level */
+ if (channel->rx_alloc_level < 0)
+ channel->rx_alloc_level = 0;
+ else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX)
+ channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX;
+
+ /* Decide on the allocation method */
+ method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ?
+ RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB);
+ }
+
+ /* Push the option */
+ channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE);
+}
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int rxq_size;
+ int rc;
+
+ EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue);
+
+ /* Allocate RX buffers */
+ rxq_size = (efx->type->rxd_ring_mask + 1) * sizeof(*rx_queue->buffer);
+ rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL);
+ if (!rx_queue->buffer) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+
+ rc = falcon_probe_rx(rx_queue);
+ if (rc)
+ goto fail2;
+
+ return 0;
+
+ fail2:
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+ fail1:
+ rx_queue->used = 0;
+
+ return rc;
+}
+
+int efx_init_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int max_fill, trigger, limit;
+
+ EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue);
+
+ /* Initialise ptr fields */
+ rx_queue->added_count = 0;
+ rx_queue->notified_count = 0;
+ rx_queue->removed_count = 0;
+ rx_queue->min_fill = -1U;
+ rx_queue->min_overfill = -1U;
+
+ /* Initialise limit fields */
+ max_fill = efx->type->rxd_ring_mask + 1 - EFX_RXD_HEAD_ROOM;
+ trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+ limit = max_fill * min(rx_refill_limit, 100U) / 100U;
+
+ rx_queue->max_fill = max_fill;
+ rx_queue->fast_fill_trigger = trigger;
+ rx_queue->fast_fill_limit = limit;
+
+ /* Set up RX descriptor ring */
+ return falcon_init_rx(rx_queue);
+}
+
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ int i;
+ struct efx_rx_buffer *rx_buf;
+
+ EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue);
+
+ falcon_fini_rx(rx_queue);
+
+ /* Release RX buffers NB start at index 0 not current HW ptr */
+ if (rx_queue->buffer) {
+ for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) {
+ rx_buf = efx_rx_buffer(rx_queue, i);
+ efx_fini_rx_buffer(rx_queue, rx_buf);
+ }
+ }
+
+ /* For a page that is part-way through splitting into RX buffers */
+ if (rx_queue->buf_page != NULL) {
+ pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr,
+ RX_PAGE_SIZE(rx_queue->efx), PCI_DMA_FROMDEVICE);
+ __free_pages(rx_queue->buf_page,
+ rx_queue->efx->rx_buffer_order);
+ rx_queue->buf_page = NULL;
+ }
+}
+
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue);
+
+ falcon_remove_rx(rx_queue);
+
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+ rx_queue->used = 0;
+}
+
+void efx_flush_lro(struct efx_channel *channel)
+{
+ lro_flush_all(&channel->lro_mgr);
+}
+
+
+module_param(rx_alloc_method, int, 0644);
+MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers");
+
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+ "RX descriptor ring fast/slow fill threshold (%)");
+
diff --git a/drivers/net/sfc/rx.h b/drivers/net/sfc/rx.h
new file mode 100644
index 00000000000..f35e377bfc5
--- /dev/null
+++ b/drivers/net/sfc/rx.h
@@ -0,0 +1,29 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_RX_H
+#define EFX_RX_H
+
+#include "net_driver.h"
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
+int efx_init_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
+
+int efx_lro_init(struct net_lro_mgr *lro_mgr, struct efx_nic *efx);
+void efx_lro_fini(struct net_lro_mgr *lro_mgr);
+void efx_flush_lro(struct efx_channel *channel);
+void efx_rx_strategy(struct efx_channel *channel);
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue);
+void efx_rx_work(struct work_struct *data);
+void __efx_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf, int checksummed);
+
+#endif /* EFX_RX_H */
diff --git a/drivers/net/sfc/sfe4001.c b/drivers/net/sfc/sfe4001.c
new file mode 100644
index 00000000000..11fa9fb8f48
--- /dev/null
+++ b/drivers/net/sfc/sfe4001.c
@@ -0,0 +1,252 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/*****************************************************************************
+ * Support for the SFE4001 NIC: driver code for the PCA9539 I/O expander that
+ * controls the PHY power rails, and for the MAX6647 temp. sensor used to check
+ * the PHY
+ */
+#include <linux/delay.h>
+#include "efx.h"
+#include "phy.h"
+#include "boards.h"
+#include "falcon.h"
+#include "falcon_hwdefs.h"
+#include "mac.h"
+
+/**************************************************************************
+ *
+ * I2C IO Expander device
+ *
+ **************************************************************************/
+#define PCA9539 0x74
+
+#define P0_IN 0x00
+#define P0_OUT 0x02
+#define P0_INVERT 0x04
+#define P0_CONFIG 0x06
+
+#define P0_EN_1V0X_LBN 0
+#define P0_EN_1V0X_WIDTH 1
+#define P0_EN_1V2_LBN 1
+#define P0_EN_1V2_WIDTH 1
+#define P0_EN_2V5_LBN 2
+#define P0_EN_2V5_WIDTH 1
+#define P0_EN_3V3X_LBN 3
+#define P0_EN_3V3X_WIDTH 1
+#define P0_EN_5V_LBN 4
+#define P0_EN_5V_WIDTH 1
+#define P0_SHORTEN_JTAG_LBN 5
+#define P0_SHORTEN_JTAG_WIDTH 1
+#define P0_X_TRST_LBN 6
+#define P0_X_TRST_WIDTH 1
+#define P0_DSP_RESET_LBN 7
+#define P0_DSP_RESET_WIDTH 1
+
+#define P1_IN 0x01
+#define P1_OUT 0x03
+#define P1_INVERT 0x05
+#define P1_CONFIG 0x07
+
+#define P1_AFE_PWD_LBN 0
+#define P1_AFE_PWD_WIDTH 1
+#define P1_DSP_PWD25_LBN 1
+#define P1_DSP_PWD25_WIDTH 1
+#define P1_RESERVED_LBN 2
+#define P1_RESERVED_WIDTH 2
+#define P1_SPARE_LBN 4
+#define P1_SPARE_WIDTH 4
+
+
+/**************************************************************************
+ *
+ * Temperature Sensor
+ *
+ **************************************************************************/
+#define MAX6647 0x4e
+
+#define RLTS 0x00
+#define RLTE 0x01
+#define RSL 0x02
+#define RCL 0x03
+#define RCRA 0x04
+#define RLHN 0x05
+#define RLLI 0x06
+#define RRHI 0x07
+#define RRLS 0x08
+#define WCRW 0x0a
+#define WLHO 0x0b
+#define WRHA 0x0c
+#define WRLN 0x0e
+#define OSHT 0x0f
+#define REET 0x10
+#define RIET 0x11
+#define RWOE 0x19
+#define RWOI 0x20
+#define HYS 0x21
+#define QUEUE 0x22
+#define MFID 0xfe
+#define REVID 0xff
+
+/* Status bits */
+#define MAX6647_BUSY (1 << 7) /* ADC is converting */
+#define MAX6647_LHIGH (1 << 6) /* Local high temp. alarm */
+#define MAX6647_LLOW (1 << 5) /* Local low temp. alarm */
+#define MAX6647_RHIGH (1 << 4) /* Remote high temp. alarm */
+#define MAX6647_RLOW (1 << 3) /* Remote low temp. alarm */
+#define MAX6647_FAULT (1 << 2) /* DXN/DXP short/open circuit */
+#define MAX6647_EOT (1 << 1) /* Remote junction overtemp. */
+#define MAX6647_IOT (1 << 0) /* Local junction overtemp. */
+
+static const u8 xgphy_max_temperature = 90;
+
+void sfe4001_poweroff(struct efx_nic *efx)
+{
+ struct efx_i2c_interface *i2c = &efx->i2c;
+
+ u8 cfg, out, in;
+
+ EFX_INFO(efx, "%s\n", __func__);
+
+ /* Turn off all power rails */
+ out = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
+
+ /* Disable port 1 outputs on IO expander */
+ cfg = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1);
+
+ /* Disable port 0 outputs on IO expander */
+ cfg = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1);
+
+ /* Clear any over-temperature alert */
+ (void) efx_i2c_read(i2c, MAX6647, RSL, &in, 1);
+}
+
+/* This board uses an I2C expander to provider power to the PHY, which needs to
+ * be turned on before the PHY can be used.
+ * Context: Process context, rtnl lock held
+ */
+int sfe4001_poweron(struct efx_nic *efx)
+{
+ struct efx_i2c_interface *i2c = &efx->i2c;
+ unsigned int count;
+ int rc;
+ u8 out, in, cfg;
+ efx_dword_t reg;
+
+ /* 10Xpress has fixed-function LED pins, so there is no board-specific
+ * blink code. */
+ efx->board_info.blink = tenxpress_phy_blink;
+
+ /* Ensure that XGXS and XAUI SerDes are held in reset */
+ EFX_POPULATE_DWORD_7(reg, XX_PWRDNA_EN, 1,
+ XX_PWRDNB_EN, 1,
+ XX_RSTPLLAB_EN, 1,
+ XX_RESETA_EN, 1,
+ XX_RESETB_EN, 1,
+ XX_RSTXGXSRX_EN, 1,
+ XX_RSTXGXSTX_EN, 1);
+ falcon_xmac_writel(efx, &reg, XX_PWR_RST_REG_MAC);
+ udelay(10);
+
+ /* Set DSP over-temperature alert threshold */
+ EFX_INFO(efx, "DSP cut-out at %dC\n", xgphy_max_temperature);
+ rc = efx_i2c_write(i2c, MAX6647, WLHO,
+ &xgphy_max_temperature, 1);
+ if (rc)
+ goto fail1;
+
+ /* Read it back and verify */
+ rc = efx_i2c_read(i2c, MAX6647, RLHN, &in, 1);
+ if (rc)
+ goto fail1;
+ if (in != xgphy_max_temperature) {
+ rc = -EFAULT;
+ goto fail1;
+ }
+
+ /* Clear any previous over-temperature alert */
+ rc = efx_i2c_read(i2c, MAX6647, RSL, &in, 1);
+ if (rc)
+ goto fail1;
+
+ /* Enable port 0 and port 1 outputs on IO expander */
+ cfg = 0x00;
+ rc = efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1);
+ if (rc)
+ goto fail1;
+ cfg = 0xff & ~(1 << P1_SPARE_LBN);
+ rc = efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1);
+ if (rc)
+ goto fail2;
+
+ /* Turn all power off then wait 1 sec. This ensures PHY is reset */
+ out = 0xff & ~((0 << P0_EN_1V2_LBN) | (0 << P0_EN_2V5_LBN) |
+ (0 << P0_EN_3V3X_LBN) | (0 << P0_EN_5V_LBN) |
+ (0 << P0_EN_1V0X_LBN));
+ rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
+ if (rc)
+ goto fail3;
+
+ schedule_timeout_uninterruptible(HZ);
+ count = 0;
+ do {
+ /* Turn on 1.2V, 2.5V, 3.3V and 5V power rails */
+ out = 0xff & ~((1 << P0_EN_1V2_LBN) | (1 << P0_EN_2V5_LBN) |
+ (1 << P0_EN_3V3X_LBN) | (1 << P0_EN_5V_LBN) |
+ (1 << P0_X_TRST_LBN));
+
+ rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
+ if (rc)
+ goto fail3;
+ msleep(10);
+
+ /* Turn on 1V power rail */
+ out &= ~(1 << P0_EN_1V0X_LBN);
+ rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
+ if (rc)
+ goto fail3;
+
+ EFX_INFO(efx, "waiting for power (attempt %d)...\n", count);
+
+ schedule_timeout_uninterruptible(HZ);
+
+ /* Check DSP is powered */
+ rc = efx_i2c_read(i2c, PCA9539, P1_IN, &in, 1);
+ if (rc)
+ goto fail3;
+ if (in & (1 << P1_AFE_PWD_LBN))
+ goto done;
+
+ } while (++count < 20);
+
+ EFX_INFO(efx, "timed out waiting for power\n");
+ rc = -ETIMEDOUT;
+ goto fail3;
+
+done:
+ EFX_INFO(efx, "PHY is powered on\n");
+ return 0;
+
+fail3:
+ /* Turn off all power rails */
+ out = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1);
+ /* Disable port 1 outputs on IO expander */
+ out = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &out, 1);
+fail2:
+ /* Disable port 0 outputs on IO expander */
+ out = 0xff;
+ (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &out, 1);
+fail1:
+ return rc;
+}
diff --git a/drivers/net/sfc/spi.h b/drivers/net/sfc/spi.h
new file mode 100644
index 00000000000..34412f3d41c
--- /dev/null
+++ b/drivers/net/sfc/spi.h
@@ -0,0 +1,71 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005 Fen Systems Ltd.
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_SPI_H
+#define EFX_SPI_H
+
+#include "net_driver.h"
+
+/**************************************************************************
+ *
+ * Basic SPI command set and bit definitions
+ *
+ *************************************************************************/
+
+/*
+ * Commands common to all known devices.
+ *
+ */
+
+/* Write status register */
+#define SPI_WRSR 0x01
+
+/* Write data to memory array */
+#define SPI_WRITE 0x02
+
+/* Read data from memory array */
+#define SPI_READ 0x03
+
+/* Reset write enable latch */
+#define SPI_WRDI 0x04
+
+/* Read status register */
+#define SPI_RDSR 0x05
+
+/* Set write enable latch */
+#define SPI_WREN 0x06
+
+/* SST: Enable write to status register */
+#define SPI_SST_EWSR 0x50
+
+/*
+ * Status register bits. Not all bits are supported on all devices.
+ *
+ */
+
+/* Write-protect pin enabled */
+#define SPI_STATUS_WPEN 0x80
+
+/* Block protection bit 2 */
+#define SPI_STATUS_BP2 0x10
+
+/* Block protection bit 1 */
+#define SPI_STATUS_BP1 0x08
+
+/* Block protection bit 0 */
+#define SPI_STATUS_BP0 0x04
+
+/* State of the write enable latch */
+#define SPI_STATUS_WEN 0x02
+
+/* Device busy flag */
+#define SPI_STATUS_NRDY 0x01
+
+#endif /* EFX_SPI_H */
diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c
new file mode 100644
index 00000000000..a2e9f79e47b
--- /dev/null
+++ b/drivers/net/sfc/tenxpress.c
@@ -0,0 +1,434 @@
+/****************************************************************************
+ * Driver for Solarflare 802.3an compliant PHY
+ * Copyright 2007 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include "efx.h"
+#include "gmii.h"
+#include "mdio_10g.h"
+#include "falcon.h"
+#include "phy.h"
+#include "falcon_hwdefs.h"
+#include "boards.h"
+#include "mac.h"
+
+/* We expect these MMDs to be in the package */
+/* AN not here as mdio_check_mmds() requires STAT2 support */
+#define TENXPRESS_REQUIRED_DEVS (MDIO_MMDREG_DEVS0_PMAPMD | \
+ MDIO_MMDREG_DEVS0_PCS | \
+ MDIO_MMDREG_DEVS0_PHYXS)
+
+/* We complain if we fail to see the link partner as 10G capable this many
+ * times in a row (must be > 1 as sampling the autoneg. registers is racy)
+ */
+#define MAX_BAD_LP_TRIES (5)
+
+/* Extended control register */
+#define PMA_PMD_XCONTROL_REG 0xc000
+#define PMA_PMD_LNPGA_POWERDOWN_LBN 8
+#define PMA_PMD_LNPGA_POWERDOWN_WIDTH 1
+
+/* extended status register */
+#define PMA_PMD_XSTATUS_REG 0xc001
+#define PMA_PMD_XSTAT_FLP_LBN (12)
+
+/* LED control register */
+#define PMA_PMD_LED_CTRL_REG (0xc007)
+#define PMA_PMA_LED_ACTIVITY_LBN (3)
+
+/* LED function override register */
+#define PMA_PMD_LED_OVERR_REG (0xc009)
+/* Bit positions for different LEDs (there are more but not wired on SFE4001)*/
+#define PMA_PMD_LED_LINK_LBN (0)
+#define PMA_PMD_LED_SPEED_LBN (2)
+#define PMA_PMD_LED_TX_LBN (4)
+#define PMA_PMD_LED_RX_LBN (6)
+/* Override settings */
+#define PMA_PMD_LED_AUTO (0) /* H/W control */
+#define PMA_PMD_LED_ON (1)
+#define PMA_PMD_LED_OFF (2)
+#define PMA_PMD_LED_FLASH (3)
+/* All LEDs under hardware control */
+#define PMA_PMD_LED_FULL_AUTO (0)
+/* Green and Amber under hardware control, Red off */
+#define PMA_PMD_LED_DEFAULT (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN)
+
+
+/* Self test (BIST) control register */
+#define PMA_PMD_BIST_CTRL_REG (0xc014)
+#define PMA_PMD_BIST_BER_LBN (2) /* Run BER test */
+#define PMA_PMD_BIST_CONT_LBN (1) /* Run continuous BIST until cleared */
+#define PMA_PMD_BIST_SINGLE_LBN (0) /* Run 1 BIST iteration (self clears) */
+/* Self test status register */
+#define PMA_PMD_BIST_STAT_REG (0xc015)
+#define PMA_PMD_BIST_ENX_LBN (3)
+#define PMA_PMD_BIST_PMA_LBN (2)
+#define PMA_PMD_BIST_RXD_LBN (1)
+#define PMA_PMD_BIST_AFE_LBN (0)
+
+#define BIST_MAX_DELAY (1000)
+#define BIST_POLL_DELAY (10)
+
+/* Misc register defines */
+#define PCS_CLOCK_CTRL_REG 0xd801
+#define PLL312_RST_N_LBN 2
+
+#define PCS_SOFT_RST2_REG 0xd806
+#define SERDES_RST_N_LBN 13
+#define XGXS_RST_N_LBN 12
+
+#define PCS_TEST_SELECT_REG 0xd807 /* PRM 10.5.8 */
+#define CLK312_EN_LBN 3
+
+/* Boot status register */
+#define PCS_BOOT_STATUS_REG (0xd000)
+#define PCS_BOOT_FATAL_ERR_LBN (0)
+#define PCS_BOOT_PROGRESS_LBN (1)
+#define PCS_BOOT_PROGRESS_WIDTH (2)
+#define PCS_BOOT_COMPLETE_LBN (3)
+#define PCS_BOOT_MAX_DELAY (100)
+#define PCS_BOOT_POLL_DELAY (10)
+
+/* Time to wait between powering down the LNPGA and turning off the power
+ * rails */
+#define LNPGA_PDOWN_WAIT (HZ / 5)
+
+static int crc_error_reset_threshold = 100;
+module_param(crc_error_reset_threshold, int, 0644);
+MODULE_PARM_DESC(crc_error_reset_threshold,
+ "Max number of CRC errors before XAUI reset");
+
+struct tenxpress_phy_data {
+ enum tenxpress_state state;
+ atomic_t bad_crc_count;
+ int bad_lp_tries;
+};
+
+static int tenxpress_state_is(struct efx_nic *efx, int state)
+{
+ struct tenxpress_phy_data *phy_data = efx->phy_data;
+ return (phy_data != NULL) && (state == phy_data->state);
+}
+
+void tenxpress_set_state(struct efx_nic *efx,
+ enum tenxpress_state state)
+{
+ struct tenxpress_phy_data *phy_data = efx->phy_data;
+ if (phy_data != NULL)
+ phy_data->state = state;
+}
+
+void tenxpress_crc_err(struct efx_nic *efx)
+{
+ struct tenxpress_phy_data *phy_data = efx->phy_data;
+ if (phy_data != NULL)
+ atomic_inc(&phy_data->bad_crc_count);
+}
+
+/* Check that the C166 has booted successfully */
+static int tenxpress_phy_check(struct efx_nic *efx)
+{
+ int phy_id = efx->mii.phy_id;
+ int count = PCS_BOOT_MAX_DELAY / PCS_BOOT_POLL_DELAY;
+ int boot_stat;
+
+ /* Wait for the boot to complete (or not) */
+ while (count) {
+ boot_stat = mdio_clause45_read(efx, phy_id,
+ MDIO_MMD_PCS,
+ PCS_BOOT_STATUS_REG);
+ if (boot_stat & (1 << PCS_BOOT_COMPLETE_LBN))
+ break;
+ count--;
+ udelay(PCS_BOOT_POLL_DELAY);
+ }
+
+ if (!count) {
+ EFX_ERR(efx, "%s: PHY boot timed out. Last status "
+ "%x\n", __func__,
+ (boot_stat >> PCS_BOOT_PROGRESS_LBN) &
+ ((1 << PCS_BOOT_PROGRESS_WIDTH) - 1));
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void tenxpress_reset_xaui(struct efx_nic *efx);
+
+static int tenxpress_init(struct efx_nic *efx)
+{
+ int rc, reg;
+
+ /* Turn on the clock */
+ reg = (1 << CLK312_EN_LBN);
+ mdio_clause45_write(efx, efx->mii.phy_id,
+ MDIO_MMD_PCS, PCS_TEST_SELECT_REG, reg);
+
+ rc = tenxpress_phy_check(efx);
+ if (rc < 0)
+ return rc;
+
+ /* Set the LEDs up as: Green = Link, Amber = Link/Act, Red = Off */
+ reg = mdio_clause45_read(efx, efx->mii.phy_id,
+ MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG);
+ reg |= (1 << PMA_PMA_LED_ACTIVITY_LBN);
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ PMA_PMD_LED_CTRL_REG, reg);
+
+ reg = PMA_PMD_LED_DEFAULT;
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ PMA_PMD_LED_OVERR_REG, reg);
+
+ return rc;
+}
+
+static int tenxpress_phy_init(struct efx_nic *efx)
+{
+ struct tenxpress_phy_data *phy_data;
+ int rc = 0;
+
+ phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
+ efx->phy_data = phy_data;
+
+ tenxpress_set_state(efx, TENXPRESS_STATUS_NORMAL);
+
+ rc = mdio_clause45_wait_reset_mmds(efx,
+ TENXPRESS_REQUIRED_DEVS);
+ if (rc < 0)
+ goto fail;
+
+ rc = mdio_clause45_check_mmds(efx, TENXPRESS_REQUIRED_DEVS, 0);
+ if (rc < 0)
+ goto fail;
+
+ rc = tenxpress_init(efx);
+ if (rc < 0)
+ goto fail;
+
+ schedule_timeout_uninterruptible(HZ / 5); /* 200ms */
+
+ /* Let XGXS and SerDes out of reset and resets 10XPress */
+ falcon_reset_xaui(efx);
+
+ return 0;
+
+ fail:
+ kfree(efx->phy_data);
+ efx->phy_data = NULL;
+ return rc;
+}
+
+static void tenxpress_set_bad_lp(struct efx_nic *efx, int bad_lp)
+{
+ struct tenxpress_phy_data *pd = efx->phy_data;
+ int reg;
+
+ /* Nothing to do if all is well and was previously so. */
+ if (!(bad_lp || pd->bad_lp_tries))
+ return;
+
+ reg = mdio_clause45_read(efx, efx->mii.phy_id,
+ MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG);
+
+ if (bad_lp)
+ pd->bad_lp_tries++;
+ else
+ pd->bad_lp_tries = 0;
+
+ if (pd->bad_lp_tries == MAX_BAD_LP_TRIES) {
+ pd->bad_lp_tries = 0; /* Restart count */
+ reg &= ~(PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN);
+ reg |= (PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN);
+ EFX_ERR(efx, "This NIC appears to be plugged into"
+ " a port that is not 10GBASE-T capable.\n"
+ " This PHY is 10GBASE-T ONLY, so no link can"
+ " be established.\n");
+ } else {
+ reg |= (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN);
+ }
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ PMA_PMD_LED_OVERR_REG, reg);
+}
+
+/* Check link status and return a boolean OK value. If the link is NOT
+ * OK we have a quick rummage round to see if we appear to be plugged
+ * into a non-10GBT port and if so warn the user that they won't get
+ * link any time soon as we are 10GBT only, unless caller specified
+ * not to do this check (it isn't useful in loopback) */
+static int tenxpress_link_ok(struct efx_nic *efx, int check_lp)
+{
+ int ok = mdio_clause45_links_ok(efx, TENXPRESS_REQUIRED_DEVS);
+
+ if (ok) {
+ tenxpress_set_bad_lp(efx, 0);
+ } else if (check_lp) {
+ /* Are we plugged into the wrong sort of link? */
+ int bad_lp = 0;
+ int phy_id = efx->mii.phy_id;
+ int an_stat = mdio_clause45_read(efx, phy_id, MDIO_MMD_AN,
+ MDIO_AN_STATUS);
+ int xphy_stat = mdio_clause45_read(efx, phy_id,
+ MDIO_MMD_PMAPMD,
+ PMA_PMD_XSTATUS_REG);
+ /* Are we plugged into anything that sends FLPs? If
+ * not we can't distinguish between not being plugged
+ * in and being plugged into a non-AN antique. The FLP
+ * bit has the advantage of not clearing when autoneg
+ * restarts. */
+ if (!(xphy_stat & (1 << PMA_PMD_XSTAT_FLP_LBN))) {
+ tenxpress_set_bad_lp(efx, 0);
+ return ok;
+ }
+
+ /* If it can do 10GBT it must be XNP capable */
+ bad_lp = !(an_stat & (1 << MDIO_AN_STATUS_XNP_LBN));
+ if (!bad_lp && (an_stat & (1 << MDIO_AN_STATUS_PAGE_LBN))) {
+ bad_lp = !(mdio_clause45_read(efx, phy_id,
+ MDIO_MMD_AN, MDIO_AN_10GBT_STATUS) &
+ (1 << MDIO_AN_10GBT_STATUS_LP_10G_LBN));
+ }
+ tenxpress_set_bad_lp(efx, bad_lp);
+ }
+ return ok;
+}
+
+static void tenxpress_phy_reconfigure(struct efx_nic *efx)
+{
+ if (!tenxpress_state_is(efx, TENXPRESS_STATUS_NORMAL))
+ return;
+
+ efx->link_up = tenxpress_link_ok(efx, 0);
+ efx->link_options = GM_LPA_10000FULL;
+}
+
+static void tenxpress_phy_clear_interrupt(struct efx_nic *efx)
+{
+ /* Nothing done here - LASI interrupts aren't reliable so poll */
+}
+
+
+/* Poll PHY for interrupt */
+static int tenxpress_phy_check_hw(struct efx_nic *efx)
+{
+ struct tenxpress_phy_data *phy_data = efx->phy_data;
+ int phy_up = tenxpress_state_is(efx, TENXPRESS_STATUS_NORMAL);
+ int link_ok;
+
+ link_ok = phy_up && tenxpress_link_ok(efx, 1);
+
+ if (link_ok != efx->link_up)
+ falcon_xmac_sim_phy_event(efx);
+
+ /* Nothing to check if we've already shut down the PHY */
+ if (!phy_up)
+ return 0;
+
+ if (atomic_read(&phy_data->bad_crc_count) > crc_error_reset_threshold) {
+ EFX_ERR(efx, "Resetting XAUI due to too many CRC errors\n");
+ falcon_reset_xaui(efx);
+ atomic_set(&phy_data->bad_crc_count, 0);
+ }
+
+ return 0;
+}
+
+static void tenxpress_phy_fini(struct efx_nic *efx)
+{
+ int reg;
+
+ /* Power down the LNPGA */
+ reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN);
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ PMA_PMD_XCONTROL_REG, reg);
+
+ /* Waiting here ensures that the board fini, which can turn off the
+ * power to the PHY, won't get run until the LNPGA powerdown has been
+ * given long enough to complete. */
+ schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */
+
+ kfree(efx->phy_data);
+ efx->phy_data = NULL;
+}
+
+
+/* Set the RX and TX LEDs and Link LED flashing. The other LEDs
+ * (which probably aren't wired anyway) are left in AUTO mode */
+void tenxpress_phy_blink(struct efx_nic *efx, int blink)
+{
+ int reg;
+
+ if (blink)
+ reg = (PMA_PMD_LED_FLASH << PMA_PMD_LED_TX_LBN) |
+ (PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN) |
+ (PMA_PMD_LED_FLASH << PMA_PMD_LED_LINK_LBN);
+ else
+ reg = PMA_PMD_LED_DEFAULT;
+
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ PMA_PMD_LED_OVERR_REG, reg);
+}
+
+static void tenxpress_reset_xaui(struct efx_nic *efx)
+{
+ int phy = efx->mii.phy_id;
+ int clk_ctrl, test_select, soft_rst2;
+
+ /* Real work is done on clock_ctrl other resets are thought to be
+ * optional but make the reset more reliable
+ */
+
+ /* Read */
+ clk_ctrl = mdio_clause45_read(efx, phy, MDIO_MMD_PCS,
+ PCS_CLOCK_CTRL_REG);
+ test_select = mdio_clause45_read(efx, phy, MDIO_MMD_PCS,
+ PCS_TEST_SELECT_REG);
+ soft_rst2 = mdio_clause45_read(efx, phy, MDIO_MMD_PCS,
+ PCS_SOFT_RST2_REG);
+
+ /* Put in reset */
+ test_select &= ~(1 << CLK312_EN_LBN);
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_TEST_SELECT_REG, test_select);
+
+ soft_rst2 &= ~((1 << XGXS_RST_N_LBN) | (1 << SERDES_RST_N_LBN));
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_SOFT_RST2_REG, soft_rst2);
+
+ clk_ctrl &= ~(1 << PLL312_RST_N_LBN);
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_CLOCK_CTRL_REG, clk_ctrl);
+ udelay(10);
+
+ /* Remove reset */
+ clk_ctrl |= (1 << PLL312_RST_N_LBN);
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_CLOCK_CTRL_REG, clk_ctrl);
+ udelay(10);
+
+ soft_rst2 |= ((1 << XGXS_RST_N_LBN) | (1 << SERDES_RST_N_LBN));
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_SOFT_RST2_REG, soft_rst2);
+ udelay(10);
+
+ test_select |= (1 << CLK312_EN_LBN);
+ mdio_clause45_write(efx, phy, MDIO_MMD_PCS,
+ PCS_TEST_SELECT_REG, test_select);
+ udelay(10);
+}
+
+struct efx_phy_operations falcon_tenxpress_phy_ops = {
+ .init = tenxpress_phy_init,
+ .reconfigure = tenxpress_phy_reconfigure,
+ .check_hw = tenxpress_phy_check_hw,
+ .fini = tenxpress_phy_fini,
+ .clear_interrupt = tenxpress_phy_clear_interrupt,
+ .reset_xaui = tenxpress_reset_xaui,
+ .mmds = TENXPRESS_REQUIRED_DEVS,
+};
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
new file mode 100644
index 00000000000..fbb866b2185
--- /dev/null
+++ b/drivers/net/sfc/tx.c
@@ -0,0 +1,452 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include "net_driver.h"
+#include "tx.h"
+#include "efx.h"
+#include "falcon.h"
+#include "workarounds.h"
+
+/*
+ * TX descriptor ring full threshold
+ *
+ * The tx_queue descriptor ring fill-level must fall below this value
+ * before we restart the netif queue
+ */
+#define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
+ (_tx_queue->efx->type->txd_ring_mask / 2u)
+
+/* We want to be able to nest calls to netif_stop_queue(), since each
+ * channel can have an individual stop on the queue.
+ */
+void efx_stop_queue(struct efx_nic *efx)
+{
+ spin_lock_bh(&efx->netif_stop_lock);
+ EFX_TRACE(efx, "stop TX queue\n");
+
+ atomic_inc(&efx->netif_stop_count);
+ netif_stop_queue(efx->net_dev);
+
+ spin_unlock_bh(&efx->netif_stop_lock);
+}
+
+/* Wake netif's TX queue
+ * We want to be able to nest calls to netif_stop_queue(), since each
+ * channel can have an individual stop on the queue.
+ */
+inline void efx_wake_queue(struct efx_nic *efx)
+{
+ local_bh_disable();
+ if (atomic_dec_and_lock(&efx->netif_stop_count,
+ &efx->netif_stop_lock)) {
+ EFX_TRACE(efx, "waking TX queue\n");
+ netif_wake_queue(efx->net_dev);
+ spin_unlock(&efx->netif_stop_lock);
+ }
+ local_bh_enable();
+}
+
+static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer)
+{
+ if (buffer->unmap_len) {
+ struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
+ if (buffer->unmap_single)
+ pci_unmap_single(pci_dev, buffer->unmap_addr,
+ buffer->unmap_len, PCI_DMA_TODEVICE);
+ else
+ pci_unmap_page(pci_dev, buffer->unmap_addr,
+ buffer->unmap_len, PCI_DMA_TODEVICE);
+ buffer->unmap_len = 0;
+ buffer->unmap_single = 0;
+ }
+
+ if (buffer->skb) {
+ dev_kfree_skb_any((struct sk_buff *) buffer->skb);
+ buffer->skb = NULL;
+ EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
+ "complete\n", tx_queue->queue, read_ptr);
+ }
+}
+
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue. The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
+ * You must hold netif_tx_lock() to call this function.
+ */
+static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
+ const struct sk_buff *skb)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct pci_dev *pci_dev = efx->pci_dev;
+ struct efx_tx_buffer *buffer;
+ skb_frag_t *fragment;
+ struct page *page;
+ int page_offset;
+ unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
+ dma_addr_t dma_addr, unmap_addr = 0;
+ unsigned int dma_len;
+ unsigned unmap_single;
+ int q_space, i = 0;
+ int rc = NETDEV_TX_OK;
+
+ EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
+
+ /* Get size of the initial fragment */
+ len = skb_headlen(skb);
+
+ fill_level = tx_queue->insert_count - tx_queue->old_read_count;
+ q_space = efx->type->txd_ring_mask - 1 - fill_level;
+
+ /* Map for DMA. Use pci_map_single rather than pci_map_page
+ * since this is more efficient on machines with sparse
+ * memory.
+ */
+ unmap_single = 1;
+ dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
+
+ /* Process all fragments */
+ while (1) {
+ if (unlikely(pci_dma_mapping_error(dma_addr)))
+ goto pci_err;
+
+ /* Store fields for marking in the per-fragment final
+ * descriptor */
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ /* Add to TX queue, splitting across DMA boundaries */
+ do {
+ if (unlikely(q_space-- <= 0)) {
+ /* It might be that completions have
+ * happened since the xmit path last
+ * checked. Update the xmit path's
+ * copy of read_count.
+ */
+ ++tx_queue->stopped;
+ /* This memory barrier protects the
+ * change of stopped from the access
+ * of read_count. */
+ smp_mb();
+ tx_queue->old_read_count =
+ *(volatile unsigned *)
+ &tx_queue->read_count;
+ fill_level = (tx_queue->insert_count
+ - tx_queue->old_read_count);
+ q_space = (efx->type->txd_ring_mask - 1 -
+ fill_level);
+ if (unlikely(q_space-- <= 0))
+ goto stop;
+ smp_mb();
+ --tx_queue->stopped;
+ }
+
+ insert_ptr = (tx_queue->insert_count &
+ efx->type->txd_ring_mask);
+ buffer = &tx_queue->buffer[insert_ptr];
+ EFX_BUG_ON_PARANOID(buffer->skb);
+ EFX_BUG_ON_PARANOID(buffer->len);
+ EFX_BUG_ON_PARANOID(buffer->continuation != 1);
+ EFX_BUG_ON_PARANOID(buffer->unmap_len);
+
+ dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
+ if (likely(dma_len > len))
+ dma_len = len;
+
+ misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
+ if (misalign && dma_len + misalign > 512)
+ dma_len = 512 - misalign;
+
+ /* Fill out per descriptor fields */
+ buffer->len = dma_len;
+ buffer->dma_addr = dma_addr;
+ len -= dma_len;
+ dma_addr += dma_len;
+ ++tx_queue->insert_count;
+ } while (len);
+
+ /* Transfer ownership of the unmapping to the final buffer */
+ buffer->unmap_addr = unmap_addr;
+ buffer->unmap_single = unmap_single;
+ buffer->unmap_len = unmap_len;
+ unmap_len = 0;
+
+ /* Get address and size of next fragment */
+ if (i >= skb_shinfo(skb)->nr_frags)
+ break;
+ fragment = &skb_shinfo(skb)->frags[i];
+ len = fragment->size;
+ page = fragment->page;
+ page_offset = fragment->page_offset;
+ i++;
+ /* Map for DMA */
+ unmap_single = 0;
+ dma_addr = pci_map_page(pci_dev, page, page_offset, len,
+ PCI_DMA_TODEVICE);
+ }
+
+ /* Transfer ownership of the skb to the final buffer */
+ buffer->skb = skb;
+ buffer->continuation = 0;
+
+ /* Pass off to hardware */
+ falcon_push_buffers(tx_queue);
+
+ return NETDEV_TX_OK;
+
+ pci_err:
+ EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
+ "fragments for DMA\n", tx_queue->queue, skb->len,
+ skb_shinfo(skb)->nr_frags + 1);
+
+ /* Mark the packet as transmitted, and free the SKB ourselves */
+ dev_kfree_skb_any((struct sk_buff *)skb);
+ goto unwind;
+
+ stop:
+ rc = NETDEV_TX_BUSY;
+
+ if (tx_queue->stopped == 1)
+ efx_stop_queue(efx);
+
+ unwind:
+ /* Work backwards until we hit the original insert pointer value */
+ while (tx_queue->insert_count != tx_queue->write_count) {
+ --tx_queue->insert_count;
+ insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
+ buffer = &tx_queue->buffer[insert_ptr];
+ efx_dequeue_buffer(tx_queue, buffer);
+ buffer->len = 0;
+ }
+
+ /* Free the fragment we were mid-way through pushing */
+ if (unmap_len)
+ pci_unmap_page(pci_dev, unmap_addr, unmap_len,
+ PCI_DMA_TODEVICE);
+
+ return rc;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
+ unsigned int index)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int stop_index, read_ptr;
+ unsigned int mask = tx_queue->efx->type->txd_ring_mask;
+
+ stop_index = (index + 1) & mask;
+ read_ptr = tx_queue->read_count & mask;
+
+ while (read_ptr != stop_index) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+ if (unlikely(buffer->len == 0)) {
+ EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
+ "completion id %x\n", tx_queue->queue,
+ read_ptr);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ efx_dequeue_buffer(tx_queue, buffer);
+ buffer->continuation = 1;
+ buffer->len = 0;
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & mask;
+ }
+}
+
+/* Initiate a packet transmission on the specified TX queue.
+ * Note that returning anything other than NETDEV_TX_OK will cause the
+ * OS to free the skb.
+ *
+ * This function is split out from efx_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues. It is
+ * therefore a non-static inline, so as not to penalise performance
+ * for non-loopback transmissions.
+ *
+ * Context: netif_tx_lock held
+ */
+inline int efx_xmit(struct efx_nic *efx,
+ struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+ int rc;
+
+ /* Map fragments for DMA and add to TX queue */
+ rc = efx_enqueue_skb(tx_queue, skb);
+ if (unlikely(rc != NETDEV_TX_OK))
+ goto out;
+
+ /* Update last TX timer */
+ efx->net_dev->trans_start = jiffies;
+
+ out:
+ return rc;
+}
+
+/* Initiate a packet transmission. We use one channel per CPU
+ * (sharing when we have more CPUs than channels). On Falcon, the TX
+ * completion events will be directed back to the CPU that transmitted
+ * the packet, which should be cache-efficient.
+ *
+ * Context: non-blocking.
+ * Note that returning anything other than NETDEV_TX_OK will cause the
+ * OS to free the skb.
+ */
+int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+{
+ struct efx_nic *efx = net_dev->priv;
+ return efx_xmit(efx, &efx->tx_queue[0], skb);
+}
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+ unsigned fill_level;
+ struct efx_nic *efx = tx_queue->efx;
+
+ EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask);
+
+ efx_dequeue_buffers(tx_queue, index);
+
+ /* See if we need to restart the netif queue. This barrier
+ * separates the update of read_count from the test of
+ * stopped. */
+ smp_mb();
+ if (unlikely(tx_queue->stopped)) {
+ fill_level = tx_queue->insert_count - tx_queue->read_count;
+ if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) {
+ EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx));
+
+ /* Do this under netif_tx_lock(), to avoid racing
+ * with efx_xmit(). */
+ netif_tx_lock(efx->net_dev);
+ if (tx_queue->stopped) {
+ tx_queue->stopped = 0;
+ efx_wake_queue(efx);
+ }
+ netif_tx_unlock(efx->net_dev);
+ }
+ }
+}
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int txq_size;
+ int i, rc;
+
+ EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
+
+ /* Allocate software ring */
+ txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
+ tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
+ if (!tx_queue->buffer) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+ for (i = 0; i <= efx->type->txd_ring_mask; ++i)
+ tx_queue->buffer[i].continuation = 1;
+
+ /* Allocate hardware ring */
+ rc = falcon_probe_tx(tx_queue);
+ if (rc)
+ goto fail2;
+
+ return 0;
+
+ fail2:
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ fail1:
+ tx_queue->used = 0;
+
+ return rc;
+}
+
+int efx_init_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
+
+ tx_queue->insert_count = 0;
+ tx_queue->write_count = 0;
+ tx_queue->read_count = 0;
+ tx_queue->old_read_count = 0;
+ BUG_ON(tx_queue->stopped);
+
+ /* Set up TX descriptor ring */
+ return falcon_init_tx(tx_queue);
+}
+
+void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer;
+
+ if (!tx_queue->buffer)
+ return;
+
+ /* Free any buffers left in the ring */
+ while (tx_queue->read_count != tx_queue->write_count) {
+ buffer = &tx_queue->buffer[tx_queue->read_count &
+ tx_queue->efx->type->txd_ring_mask];
+ efx_dequeue_buffer(tx_queue, buffer);
+ buffer->continuation = 1;
+ buffer->len = 0;
+
+ ++tx_queue->read_count;
+ }
+}
+
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
+
+ /* Flush TX queue, remove descriptor ring */
+ falcon_fini_tx(tx_queue);
+
+ efx_release_tx_buffers(tx_queue);
+
+ /* Release queue's stop on port, if any */
+ if (tx_queue->stopped) {
+ tx_queue->stopped = 0;
+ efx_wake_queue(tx_queue->efx);
+ }
+}
+
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
+ falcon_remove_tx(tx_queue);
+
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ tx_queue->used = 0;
+}
+
+
diff --git a/drivers/net/sfc/tx.h b/drivers/net/sfc/tx.h
new file mode 100644
index 00000000000..1526a73b4b5
--- /dev/null
+++ b/drivers/net/sfc/tx.h
@@ -0,0 +1,24 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006 Fen Systems Ltd.
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_H
+#define EFX_TX_H
+
+#include "net_driver.h"
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
+int efx_init_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
+
+int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev);
+void efx_release_tx_buffers(struct efx_tx_queue *tx_queue);
+
+#endif /* EFX_TX_H */
diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h
new file mode 100644
index 00000000000..dca62f19019
--- /dev/null
+++ b/drivers/net/sfc/workarounds.h
@@ -0,0 +1,56 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_WORKAROUNDS_H
+#define EFX_WORKAROUNDS_H
+
+/*
+ * Hardware workarounds.
+ * Bug numbers are from Solarflare's Bugzilla.
+ */
+
+#define EFX_WORKAROUND_ALWAYS(efx) 1
+#define EFX_WORKAROUND_FALCON_A(efx) (FALCON_REV(efx) <= FALCON_REV_A1)
+
+/* XAUI resets if link not detected */
+#define EFX_WORKAROUND_5147 EFX_WORKAROUND_ALWAYS
+/* SNAP frames have TOBE_DISC set */
+#define EFX_WORKAROUND_5475 EFX_WORKAROUND_ALWAYS
+/* RX PCIe double split performance issue */
+#define EFX_WORKAROUND_7575 EFX_WORKAROUND_ALWAYS
+/* TX pkt parser problem with <= 16 byte TXes */
+#define EFX_WORKAROUND_9141 EFX_WORKAROUND_ALWAYS
+/* XGXS and XAUI reset sequencing in SW */
+#define EFX_WORKAROUND_9388 EFX_WORKAROUND_ALWAYS
+/* Low rate CRC errors require XAUI reset */
+#define EFX_WORKAROUND_10750 EFX_WORKAROUND_ALWAYS
+/* TX_EV_PKT_ERR can be caused by a dangling TX descriptor
+ * or a PCIe error (bug 11028) */
+#define EFX_WORKAROUND_10727 EFX_WORKAROUND_ALWAYS
+/* Transmit flow control may get disabled */
+#define EFX_WORKAROUND_11482 EFX_WORKAROUND_ALWAYS
+/* Flush events can take a very long time to appear */
+#define EFX_WORKAROUND_11557 EFX_WORKAROUND_ALWAYS
+
+/* Spurious parity errors in TSORT buffers */
+#define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A
+/* iSCSI parsing errors */
+#define EFX_WORKAROUND_5583 EFX_WORKAROUND_FALCON_A
+/* RX events go missing */
+#define EFX_WORKAROUND_5676 EFX_WORKAROUND_FALCON_A
+/* RX_RESET on A1 */
+#define EFX_WORKAROUND_6555 EFX_WORKAROUND_FALCON_A
+/* Increase filter depth to avoid RX_RESET */
+#define EFX_WORKAROUND_7244 EFX_WORKAROUND_FALCON_A
+/* Flushes may never complete */
+#define EFX_WORKAROUND_7803 EFX_WORKAROUND_FALCON_A
+/* Leak overlength packets rather than free */
+#define EFX_WORKAROUND_8071 EFX_WORKAROUND_FALCON_A
+
+#endif /* EFX_WORKAROUNDS_H */
diff --git a/drivers/net/sfc/xenpack.h b/drivers/net/sfc/xenpack.h
new file mode 100644
index 00000000000..b0d1f225b70
--- /dev/null
+++ b/drivers/net/sfc/xenpack.h
@@ -0,0 +1,62 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_XENPACK_H
+#define EFX_XENPACK_H
+
+/* Exported functions from Xenpack standard PHY control */
+
+#include "mdio_10g.h"
+
+/****************************************************************************/
+/* XENPACK MDIO register extensions */
+#define MDIO_XP_LASI_RX_CTRL (0x9000)
+#define MDIO_XP_LASI_TX_CTRL (0x9001)
+#define MDIO_XP_LASI_CTRL (0x9002)
+#define MDIO_XP_LASI_RX_STAT (0x9003)
+#define MDIO_XP_LASI_TX_STAT (0x9004)
+#define MDIO_XP_LASI_STAT (0x9005)
+
+/* Control/Status bits */
+#define XP_LASI_LS_ALARM (1 << 0)
+#define XP_LASI_TX_ALARM (1 << 1)
+#define XP_LASI_RX_ALARM (1 << 2)
+/* These two are Quake vendor extensions to the standard XENPACK defines */
+#define XP_LASI_LS_INTB (1 << 3)
+#define XP_LASI_TEST (1 << 7)
+
+/* Enable LASI interrupts for PHY */
+static inline void xenpack_enable_lasi_irqs(struct efx_nic *efx)
+{
+ int reg;
+ int phy_id = efx->mii.phy_id;
+ /* Read to clear LASI status register */
+ reg = mdio_clause45_read(efx, phy_id, MDIO_MMD_PMAPMD,
+ MDIO_XP_LASI_STAT);
+
+ mdio_clause45_write(efx, phy_id, MDIO_MMD_PMAPMD,
+ MDIO_XP_LASI_CTRL, XP_LASI_LS_ALARM);
+}
+
+/* Read the LASI interrupt status to clear the interrupt. */
+static inline int xenpack_clear_lasi_irqs(struct efx_nic *efx)
+{
+ /* Read to clear link status alarm */
+ return mdio_clause45_read(efx, efx->mii.phy_id,
+ MDIO_MMD_PMAPMD, MDIO_XP_LASI_STAT);
+}
+
+/* Turn off LASI interrupts */
+static inline void xenpack_disable_lasi_irqs(struct efx_nic *efx)
+{
+ mdio_clause45_write(efx, efx->mii.phy_id, MDIO_MMD_PMAPMD,
+ MDIO_XP_LASI_CTRL, 0);
+}
+
+#endif /* EFX_XENPACK_H */
diff --git a/drivers/net/sfc/xfp_phy.c b/drivers/net/sfc/xfp_phy.c
new file mode 100644
index 00000000000..66dd5bf1eaa
--- /dev/null
+++ b/drivers/net/sfc/xfp_phy.c
@@ -0,0 +1,132 @@
+/****************************************************************************
+ * Driver for Solarflare Solarstorm network controllers and boards
+ * Copyright 2006-2008 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+/*
+ * Driver for XFP optical PHYs (plus some support specific to the Quake 2032)
+ * See www.amcc.com for details (search for qt2032)
+ */
+
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include "efx.h"
+#include "gmii.h"
+#include "mdio_10g.h"
+#include "xenpack.h"
+#include "phy.h"
+#include "mac.h"
+
+#define XFP_REQUIRED_DEVS (MDIO_MMDREG_DEVS0_PCS | \
+ MDIO_MMDREG_DEVS0_PMAPMD | \
+ MDIO_MMDREG_DEVS0_PHYXS)
+
+/****************************************************************************/
+/* Quake-specific MDIO registers */
+#define MDIO_QUAKE_LED0_REG (0xD006)
+
+void xfp_set_led(struct efx_nic *p, int led, int mode)
+{
+ int addr = MDIO_QUAKE_LED0_REG + led;
+ mdio_clause45_write(p, p->mii.phy_id, MDIO_MMD_PMAPMD, addr,
+ mode);
+}
+
+#define XFP_MAX_RESET_TIME 500
+#define XFP_RESET_WAIT 10
+
+/* Reset the PHYXS MMD. This is documented (for the Quake PHY) as doing
+ * a complete soft reset.
+ */
+static int xfp_reset_phy(struct efx_nic *efx)
+{
+ int rc;
+
+ rc = mdio_clause45_reset_mmd(efx, MDIO_MMD_PHYXS,
+ XFP_MAX_RESET_TIME / XFP_RESET_WAIT,
+ XFP_RESET_WAIT);
+ if (rc < 0)
+ goto fail;
+
+ /* Wait 250ms for the PHY to complete bootup */
+ msleep(250);
+
+ /* Check that all the MMDs we expect are present and responding. We
+ * expect faults on some if the link is down, but not on the PHY XS */
+ rc = mdio_clause45_check_mmds(efx, XFP_REQUIRED_DEVS,
+ MDIO_MMDREG_DEVS0_PHYXS);
+ if (rc < 0)
+ goto fail;
+
+ efx->board_info.init_leds(efx);
+
+ return rc;
+
+ fail:
+ EFX_ERR(efx, "XFP: reset timed out!\n");
+ return rc;
+}
+
+static int xfp_phy_init(struct efx_nic *efx)
+{
+ u32 devid = mdio_clause45_read_id(efx, MDIO_MMD_PHYXS);
+ int rc;
+
+ EFX_INFO(efx, "XFP: PHY ID reg %x (OUI %x model %x revision"
+ " %x)\n", devid, MDIO_ID_OUI(devid), MDIO_ID_MODEL(devid),
+ MDIO_ID_REV(devid));
+
+ rc = xfp_reset_phy(efx);
+
+ EFX_INFO(efx, "XFP: PHY init %s.\n",
+ rc ? "failed" : "successful");
+
+ return rc;
+}
+
+static void xfp_phy_clear_interrupt(struct efx_nic *efx)
+{
+ xenpack_clear_lasi_irqs(efx);
+}
+
+static int xfp_link_ok(struct efx_nic *efx)
+{
+ return mdio_clause45_links_ok(efx, XFP_REQUIRED_DEVS);
+}
+
+static int xfp_phy_check_hw(struct efx_nic *efx)
+{
+ int rc = 0;
+ int link_up = xfp_link_ok(efx);
+ /* Simulate a PHY event if link state has changed */
+ if (link_up != efx->link_up)
+ falcon_xmac_sim_phy_event(efx);
+
+ return rc;
+}
+
+static void xfp_phy_reconfigure(struct efx_nic *efx)
+{
+ efx->link_up = xfp_link_ok(efx);
+ efx->link_options = GM_LPA_10000FULL;
+}
+
+
+static void xfp_phy_fini(struct efx_nic *efx)
+{
+ /* Clobber the LED if it was blinking */
+ efx->board_info.blink(efx, 0);
+}
+
+struct efx_phy_operations falcon_xfp_phy_ops = {
+ .init = xfp_phy_init,
+ .reconfigure = xfp_phy_reconfigure,
+ .check_hw = xfp_phy_check_hw,
+ .fini = xfp_phy_fini,
+ .clear_interrupt = xfp_phy_clear_interrupt,
+ .reset_xaui = efx_port_dummy_op_void,
+ .mmds = XFP_REQUIRED_DEVS,
+};