hns_roce_hem.c 38 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
/*
 * Copyright (c) 2016 Hisilicon Limited.
 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *      - Redistributions of source code must retain the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer.
 *
 *      - Redistributions in binary form must reproduce the above
 *        copyright notice, this list of conditions and the following
 *        disclaimer in the documentation and/or other materials
 *        provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <linux/platform_device.h>
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
#include "hns_roce_common.h"

39 40 41 42 43 44 45 46 47 48
#define HEM_INDEX_BUF			BIT(0)
#define HEM_INDEX_L0			BIT(1)
#define HEM_INDEX_L1			BIT(2)
struct hns_roce_hem_index {
	u64 buf;
	u64 l0;
	u64 l1;
	u32 inited; /* indicate which index is available */
};

49 50
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
{
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
	int hop_num = 0;

	switch (type) {
	case HEM_TYPE_QPC:
		hop_num = hr_dev->caps.qpc_hop_num;
		break;
	case HEM_TYPE_MTPT:
		hop_num = hr_dev->caps.mpt_hop_num;
		break;
	case HEM_TYPE_CQC:
		hop_num = hr_dev->caps.cqc_hop_num;
		break;
	case HEM_TYPE_SRQC:
		hop_num = hr_dev->caps.srqc_hop_num;
		break;
	case HEM_TYPE_SCCC:
		hop_num = hr_dev->caps.sccc_hop_num;
		break;
	case HEM_TYPE_QPC_TIMER:
		hop_num = hr_dev->caps.qpc_timer_hop_num;
		break;
	case HEM_TYPE_CQC_TIMER:
		hop_num = hr_dev->caps.cqc_timer_hop_num;
		break;
75 76 77
	case HEM_TYPE_GMV:
		hop_num = hr_dev->caps.gmv_hop_num;
		break;
78 79 80 81 82
	default:
		return false;
	}

	return hop_num ? true : false;
83 84
}

85 86
static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
				    u32 bt_chunk_num, u64 hem_max_num)
87
{
88
	u64 start_idx = round_down(hem_idx, bt_chunk_num);
89 90
	u64 check_max_num = start_idx + bt_chunk_num;
	u64 i;
91

92
	for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
93
		if (i != hem_idx && hem[i])
94 95 96 97 98
			return false;

	return true;
}

99
static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num)
100
{
101
	u64 start_idx = round_down(ba_idx, bt_chunk_num);
102 103 104
	int i;

	for (i = 0; i < bt_chunk_num; i++)
105
		if (i != ba_idx && bt[start_idx + i])
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
			return false;

	return true;
}

static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
{
	if (check_whether_bt_num_3(table_type, hop_num))
		return 3;
	else if (check_whether_bt_num_2(table_type, hop_num))
		return 2;
	else if (check_whether_bt_num_1(table_type, hop_num))
		return 1;
	else
		return 0;
}

123 124 125
static int get_hem_table_config(struct hns_roce_dev *hr_dev,
				struct hns_roce_hem_mhop *mhop,
				u32 type)
126 127 128
{
	struct device *dev = hr_dev->dev;

129
	switch (type) {
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
	case HEM_TYPE_QPC:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
					     + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.qpc_bt_num;
		mhop->hop_num = hr_dev->caps.qpc_hop_num;
		break;
	case HEM_TYPE_MTPT:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
					     + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.mpt_bt_num;
		mhop->hop_num = hr_dev->caps.mpt_hop_num;
		break;
	case HEM_TYPE_CQC:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
					    + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.cqc_bt_num;
		mhop->hop_num = hr_dev->caps.cqc_hop_num;
		break;
154 155 156 157 158 159 160 161
	case HEM_TYPE_SCCC:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
					    + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.sccc_bt_num;
		mhop->hop_num = hr_dev->caps.sccc_hop_num;
		break;
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
	case HEM_TYPE_QPC_TIMER:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
					    + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.qpc_timer_bt_num;
		mhop->hop_num = hr_dev->caps.qpc_timer_hop_num;
		break;
	case HEM_TYPE_CQC_TIMER:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
					    + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.cqc_timer_bt_num;
		mhop->hop_num = hr_dev->caps.cqc_timer_hop_num;
		break;
178 179 180 181 182 183 184 185
	case HEM_TYPE_SRQC:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
					     + PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
					     + PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
		mhop->hop_num = hr_dev->caps.srqc_hop_num;
		break;
186 187 188 189 190 191 192 193
	case HEM_TYPE_GMV:
		mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz +
					     PAGE_SHIFT);
		mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz +
					    PAGE_SHIFT);
		mhop->ba_l0_num = hr_dev->caps.gmv_bt_num;
		mhop->hop_num = hr_dev->caps.gmv_hop_num;
		break;
194
	default:
195
		dev_err(dev, "table %u not support multi-hop addressing!\n",
196
			type);
197 198 199
		return -EINVAL;
	}

200 201 202 203 204 205 206 207 208
	return 0;
}

int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
			   struct hns_roce_hem_table *table, unsigned long *obj,
			   struct hns_roce_hem_mhop *mhop)
{
	struct device *dev = hr_dev->dev;
	u32 chunk_ba_num;
209
	u32 chunk_size;
210 211 212 213 214 215
	u32 table_idx;
	u32 bt_num;

	if (get_hem_table_config(hr_dev, mhop, table->type))
		return -EINVAL;

216 217 218
	if (!obj)
		return 0;

219
	/*
220
	 * QPC/MTPT/CQC/SRQC/SCCC alloc hem for buffer pages.
221 222
	 * MTT/CQE alloc hem for bt pages.
	 */
223
	bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
224
	chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
225 226
	chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
			      mhop->bt_chunk_size;
227 228 229 230 231 232
	table_idx = (*obj & (table->num_obj - 1)) /
		     (chunk_size / table->obj_size);
	switch (bt_num) {
	case 3:
		mhop->l2_idx = table_idx & (chunk_ba_num - 1);
		mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1);
233
		mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num;
234 235 236 237 238 239 240 241 242
		break;
	case 2:
		mhop->l1_idx = table_idx & (chunk_ba_num - 1);
		mhop->l0_idx = table_idx / chunk_ba_num;
		break;
	case 1:
		mhop->l0_idx = table_idx;
		break;
	default:
243 244
		dev_err(dev, "table %u not support hop_num = %u!\n",
			table->type, mhop->hop_num);
245 246 247 248 249 250 251 252 253 254 255 256
		return -EINVAL;
	}
	if (mhop->l0_idx >= mhop->ba_l0_num)
		mhop->l0_idx %= mhop->ba_l0_num;

	return 0;
}

static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
					       int npages,
					       unsigned long hem_alloc_size,
					       gfp_t gfp_mask)
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
{
	struct hns_roce_hem_chunk *chunk = NULL;
	struct hns_roce_hem *hem;
	struct scatterlist *mem;
	int order;
	void *buf;

	WARN_ON(gfp_mask & __GFP_HIGHMEM);

	hem = kmalloc(sizeof(*hem),
		      gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
	if (!hem)
		return NULL;

	INIT_LIST_HEAD(&hem->chunk_list);

273
	order = get_order(hem_alloc_size);
274 275 276 277 278 279 280 281 282 283 284

	while (npages > 0) {
		if (!chunk) {
			chunk = kmalloc(sizeof(*chunk),
				gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
			if (!chunk)
				goto fail;

			sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
			chunk->npages = 0;
			chunk->nsg = 0;
285
			memset(chunk->buf, 0, sizeof(chunk->buf));
286 287 288 289 290 291 292
			list_add_tail(&chunk->list, &hem->chunk_list);
		}

		while (1 << order > npages)
			--order;

		/*
293 294 295
		 * Alloc memory one time. If failed, don't alloc small block
		 * memory, directly return fail.
		 */
296
		mem = &chunk->mem[chunk->npages];
297
		buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
298 299 300 301
				&sg_dma_address(mem), gfp_mask);
		if (!buf)
			goto fail;

302
		chunk->buf[chunk->npages] = buf;
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
		sg_dma_len(mem) = PAGE_SIZE << order;

		++chunk->npages;
		++chunk->nsg;
		npages -= 1 << order;
	}

	return hem;

fail:
	hns_roce_free_hem(hr_dev, hem);
	return NULL;
}

void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
	struct hns_roce_hem_chunk *chunk, *tmp;
	int i;

	if (!hem)
		return;

	list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
		for (i = 0; i < chunk->npages; ++i)
327
			dma_free_coherent(hr_dev->dev,
328 329
				   sg_dma_len(&chunk->mem[i]),
				   chunk->buf[i],
330 331 332 333 334 335 336
				   sg_dma_address(&chunk->mem[i]));
		kfree(chunk);
	}

	kfree(hem);
}

337 338 339 340
static int calc_hem_config(struct hns_roce_dev *hr_dev,
			   struct hns_roce_hem_table *table, unsigned long obj,
			   struct hns_roce_hem_mhop *mhop,
			   struct hns_roce_hem_index *index)
341
{
342 343 344
	struct ib_device *ibdev = &hr_dev->ib_dev;
	unsigned long mhop_obj = obj;
	u32 l0_idx, l1_idx, l2_idx;
345 346 347 348
	u32 chunk_ba_num;
	u32 bt_num;
	int ret;

349
	ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop);
350 351 352
	if (ret)
		return ret;

353 354 355 356 357
	l0_idx = mhop->l0_idx;
	l1_idx = mhop->l1_idx;
	l2_idx = mhop->l2_idx;
	chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
	bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
358 359
	switch (bt_num) {
	case 3:
360 361 362 363
		index->l1 = l0_idx * chunk_ba_num + l1_idx;
		index->l0 = l0_idx;
		index->buf = l0_idx * chunk_ba_num * chunk_ba_num +
			     l1_idx * chunk_ba_num + l2_idx;
364 365
		break;
	case 2:
366 367
		index->l0 = l0_idx;
		index->buf = l0_idx * chunk_ba_num + l1_idx;
368 369
		break;
	case 1:
370
		index->buf = l0_idx;
371 372
		break;
	default:
373
		ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n",
374
			  table->type, mhop->hop_num);
375 376 377
		return -EINVAL;
	}

378
	if (unlikely(index->buf >= table->num_hem)) {
379
		ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n",
380
			  table->type, index->buf, table->num_hem);
381 382 383
		return -EINVAL;
	}

384 385
	return 0;
}
386

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
static void free_mhop_hem(struct hns_roce_dev *hr_dev,
			  struct hns_roce_hem_table *table,
			  struct hns_roce_hem_mhop *mhop,
			  struct hns_roce_hem_index *index)
{
	u32 bt_size = mhop->bt_chunk_size;
	struct device *dev = hr_dev->dev;

	if (index->inited & HEM_INDEX_BUF) {
		hns_roce_free_hem(hr_dev, table->hem[index->buf]);
		table->hem[index->buf] = NULL;
	}

	if (index->inited & HEM_INDEX_L1) {
		dma_free_coherent(dev, bt_size, table->bt_l1[index->l1],
				  table->bt_l1_dma_addr[index->l1]);
		table->bt_l1[index->l1] = NULL;
	}

	if (index->inited & HEM_INDEX_L0) {
		dma_free_coherent(dev, bt_size, table->bt_l0[index->l0],
				  table->bt_l0_dma_addr[index->l0]);
		table->bt_l0[index->l0] = NULL;
410
	}
411 412 413 414 415 416 417 418 419 420 421 422 423 424
}

static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
			  struct hns_roce_hem_table *table,
			  struct hns_roce_hem_mhop *mhop,
			  struct hns_roce_hem_index *index)
{
	u32 bt_size = mhop->bt_chunk_size;
	struct device *dev = hr_dev->dev;
	struct hns_roce_hem_iter iter;
	gfp_t flag;
	u64 bt_ba;
	u32 size;
	int ret;
425 426

	/* alloc L1 BA's chunk */
427 428 429 430 431
	if ((check_whether_bt_num_3(table->type, mhop->hop_num) ||
	     check_whether_bt_num_2(table->type, mhop->hop_num)) &&
	     !table->bt_l0[index->l0]) {
		table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size,
					    &table->bt_l0_dma_addr[index->l0],
432
					    GFP_KERNEL);
433
		if (!table->bt_l0[index->l0]) {
434 435 436
			ret = -ENOMEM;
			goto out;
		}
437
		index->inited |= HEM_INDEX_L0;
438 439 440
	}

	/* alloc L2 BA's chunk */
441 442 443 444
	if (check_whether_bt_num_3(table->type, mhop->hop_num) &&
	    !table->bt_l1[index->l1])  {
		table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size,
					    &table->bt_l1_dma_addr[index->l1],
445
					    GFP_KERNEL);
446
		if (!table->bt_l1[index->l1]) {
447
			ret = -ENOMEM;
448
			goto err_alloc_hem;
449
		}
450 451 452
		index->inited |= HEM_INDEX_L1;
		*(table->bt_l0[index->l0] + mhop->l1_idx) =
					       table->bt_l1_dma_addr[index->l1];
453 454
	}

455
	/*
456
	 * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
457 458
	 * alloc bt space chunk for MTT/CQE.
	 */
459 460 461 462 463
	size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
	flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN;
	table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
						    size, flag);
	if (!table->hem[index->buf]) {
464
		ret = -ENOMEM;
465
		goto err_alloc_hem;
466 467
	}

468 469
	index->inited |= HEM_INDEX_BUF;
	hns_roce_hem_first(table->hem[index->buf], &iter);
470 471
	bt_ba = hns_roce_hem_addr(&iter);
	if (table->type < HEM_TYPE_MTT) {
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
		if (mhop->hop_num == 2)
			*(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
		else if (mhop->hop_num == 1)
			*(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
	} else if (mhop->hop_num == 2) {
		*(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
	}

	return 0;
err_alloc_hem:
	free_mhop_hem(hr_dev, table, mhop, index);
out:
	return ret;
}

static int set_mhop_hem(struct hns_roce_dev *hr_dev,
			struct hns_roce_hem_table *table, unsigned long obj,
			struct hns_roce_hem_mhop *mhop,
			struct hns_roce_hem_index *index)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	int step_idx;
494
	int ret = 0;
495 496 497 498 499 500

	if (index->inited & HEM_INDEX_L0) {
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
		if (ret) {
			ibdev_err(ibdev, "set HEM step 0 failed!\n");
			goto out;
501
		}
502
	}
503

504 505 506 507 508
	if (index->inited & HEM_INDEX_L1) {
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
		if (ret) {
			ibdev_err(ibdev, "set HEM step 1 failed!\n");
			goto out;
509 510 511
		}
	}

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
	if (index->inited & HEM_INDEX_BUF) {
		if (mhop->hop_num == HNS_ROCE_HOP_NUM_0)
			step_idx = 0;
		else
			step_idx = mhop->hop_num;
		ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
		if (ret)
			ibdev_err(ibdev, "set HEM step last failed!\n");
	}
out:
	return ret;
}

static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
				   struct hns_roce_hem_table *table,
				   unsigned long obj)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_hem_index index = {};
	struct hns_roce_hem_mhop mhop = {};
	int ret;
533

534 535 536 537
	ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "calc hem config failed!\n");
		return ret;
538 539
	}

540 541
	mutex_lock(&table->mutex);
	if (table->hem[index.buf]) {
542
		refcount_inc(&table->hem[index.buf]->refcount);
543
		goto out;
544 545
	}

546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
	ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "alloc mhop hem failed!\n");
		goto out;
	}

	/* set HEM base address to hardware */
	if (table->type < HEM_TYPE_MTT) {
		ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
		if (ret) {
			ibdev_err(ibdev, "set HEM address to HW failed!\n");
			goto err_alloc;
		}
	}

561
	refcount_set(&table->hem[index.buf]->refcount, 1);
562 563 564 565
	goto out;

err_alloc:
	free_mhop_hem(hr_dev, table, &mhop, &index);
566 567 568 569 570
out:
	mutex_unlock(&table->mutex);
	return ret;
}

571 572 573
int hns_roce_table_get(struct hns_roce_dev *hr_dev,
		       struct hns_roce_hem_table *table, unsigned long obj)
{
574
	struct device *dev = hr_dev->dev;
575
	unsigned long i;
576
	int ret = 0;
577

578 579 580
	if (hns_roce_check_whether_mhop(hr_dev, table->type))
		return hns_roce_table_mhop_get(hr_dev, table, obj);

581
	i = (obj & (table->num_obj - 1)) / (table->table_chunk_size /
582 583 584 585 586
	     table->obj_size);

	mutex_lock(&table->mutex);

	if (table->hem[i]) {
587
		refcount_inc(&table->hem[i]->refcount);
588 589 590 591
		goto out;
	}

	table->hem[i] = hns_roce_alloc_hem(hr_dev,
592 593
				       table->table_chunk_size >> PAGE_SHIFT,
				       table->table_chunk_size,
594 595 596 597 598 599 600 601
				       (table->lowmem ? GFP_KERNEL :
					GFP_HIGHUSER) | __GFP_NOWARN);
	if (!table->hem[i]) {
		ret = -ENOMEM;
		goto out;
	}

	/* Set HEM base address(128K/page, pa) to Hardware */
602
	if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) {
603 604
		hns_roce_free_hem(hr_dev, table->hem[i]);
		table->hem[i] = NULL;
605 606 607 608 609
		ret = -ENODEV;
		dev_err(dev, "set HEM base address to HW failed.\n");
		goto out;
	}

610
	refcount_set(&table->hem[i]->refcount, 1);
611 612 613 614 615
out:
	mutex_unlock(&table->mutex);
	return ret;
}

616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
			   struct hns_roce_hem_table *table, unsigned long obj,
			   struct hns_roce_hem_mhop *mhop,
			   struct hns_roce_hem_index *index)
{
	struct ib_device *ibdev = &hr_dev->ib_dev;
	u32 hop_num = mhop->hop_num;
	u32 chunk_ba_num;
	int step_idx;

	index->inited = HEM_INDEX_BUF;
	chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
	if (check_whether_bt_num_2(table->type, hop_num)) {
		if (hns_roce_check_hem_null(table->hem, index->buf,
					    chunk_ba_num, table->num_hem))
			index->inited |= HEM_INDEX_L0;
	} else if (check_whether_bt_num_3(table->type, hop_num)) {
		if (hns_roce_check_hem_null(table->hem, index->buf,
					    chunk_ba_num, table->num_hem)) {
			index->inited |= HEM_INDEX_L1;
			if (hns_roce_check_bt_null(table->bt_l1, index->l1,
						   chunk_ba_num))
				index->inited |= HEM_INDEX_L0;
		}
	}

	if (table->type < HEM_TYPE_MTT) {
		if (hop_num == HNS_ROCE_HOP_NUM_0)
			step_idx = 0;
		else
			step_idx = hop_num;

		if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx))
649
			ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num);
650 651 652

		if (index->inited & HEM_INDEX_L1)
			if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
653
				ibdev_warn(ibdev, "failed to clear HEM step 1.\n");
654 655 656

		if (index->inited & HEM_INDEX_L0)
			if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
657
				ibdev_warn(ibdev, "failed to clear HEM step 0.\n");
658 659 660
	}
}

661 662 663 664
static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
				    struct hns_roce_hem_table *table,
				    unsigned long obj,
				    int check_refcount)
665
{
666 667 668
	struct ib_device *ibdev = &hr_dev->ib_dev;
	struct hns_roce_hem_index index = {};
	struct hns_roce_hem_mhop mhop = {};
669 670
	int ret;

671 672 673
	ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
	if (ret) {
		ibdev_err(ibdev, "calc hem config failed!\n");
674 675 676
		return;
	}

677 678 679 680
	if (!check_refcount)
		mutex_lock(&table->mutex);
	else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount,
					      &table->mutex))
681 682
		return;

683 684
	clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
	free_mhop_hem(hr_dev, table, &mhop, &index);
685 686 687 688

	mutex_unlock(&table->mutex);
}

689 690 691
void hns_roce_table_put(struct hns_roce_dev *hr_dev,
			struct hns_roce_hem_table *table, unsigned long obj)
{
692
	struct device *dev = hr_dev->dev;
693 694
	unsigned long i;

695 696 697 698 699
	if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
		hns_roce_table_mhop_put(hr_dev, table, obj, 1);
		return;
	}

700
	i = (obj & (table->num_obj - 1)) /
701
	    (table->table_chunk_size / table->obj_size);
702

703 704 705
	if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
					 &table->mutex))
		return;
706

707
	if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT))
708
		dev_warn(dev, "failed to clear HEM base address.\n");
709

710 711
	hns_roce_free_hem(hr_dev, table->hem[i]);
	table->hem[i] = NULL;
712 713 714 715

	mutex_unlock(&table->mutex);
}

716 717 718
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
			  struct hns_roce_hem_table *table,
			  unsigned long obj, dma_addr_t *dma_handle)
719 720
{
	struct hns_roce_hem_chunk *chunk;
721
	struct hns_roce_hem_mhop mhop;
722
	struct hns_roce_hem *hem;
723
	unsigned long mhop_obj = obj;
724 725
	unsigned long obj_per_chunk;
	unsigned long idx_offset;
726
	int offset, dma_offset;
727 728
	void *addr = NULL;
	u32 hem_idx = 0;
729
	int length;
730
	int i, j;
731 732 733 734 735

	if (!table->lowmem)
		return NULL;

	mutex_lock(&table->mutex);
736 737

	if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
738 739 740 741
		obj_per_chunk = table->table_chunk_size / table->obj_size;
		hem = table->hem[(obj & (table->num_obj - 1)) / obj_per_chunk];
		idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
		dma_offset = offset = idx_offset * table->obj_size;
742
	} else {
743 744
		u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */

745 746
		if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
			goto out;
747 748 749 750
		/* mtt mhop */
		i = mhop.l0_idx;
		j = mhop.l1_idx;
		if (mhop.hop_num == 2)
751
			hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j;
752 753 754 755 756
		else if (mhop.hop_num == 1 ||
			 mhop.hop_num == HNS_ROCE_HOP_NUM_0)
			hem_idx = i;

		hem = table->hem[hem_idx];
757 758
		dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
				       mhop.bt_chunk_size;
759 760 761
		if (mhop.hop_num == 2)
			dma_offset = offset = 0;
	}
762 763 764 765 766 767

	if (!hem)
		goto out;

	list_for_each_entry(chunk, &hem->chunk_list, list) {
		for (i = 0; i < chunk->npages; ++i) {
768
			length = sg_dma_len(&chunk->mem[i]);
769
			if (dma_handle && dma_offset >= 0) {
770
				if (length > (u32)dma_offset)
771 772
					*dma_handle = sg_dma_address(
						&chunk->mem[i]) + dma_offset;
773
				dma_offset -= length;
774 775
			}

776 777
			if (length > (u32)offset) {
				addr = chunk->buf[i] + offset;
778 779
				goto out;
			}
780
			offset -= length;
781 782 783 784 785
		}
	}

out:
	mutex_unlock(&table->mutex);
786
	return addr;
787 788 789 790 791 792 793 794 795 796
}

int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
			    struct hns_roce_hem_table *table, u32 type,
			    unsigned long obj_size, unsigned long nobj,
			    int use_lowmem)
{
	unsigned long obj_per_chunk;
	unsigned long num_hem;

797
	if (!hns_roce_check_whether_mhop(hr_dev, type)) {
798 799
		table->table_chunk_size = hr_dev->caps.chunk_sz;
		obj_per_chunk = table->table_chunk_size / obj_size;
800 801 802 803 804 805
		num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;

		table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
		if (!table->hem)
			return -ENOMEM;
	} else {
806
		struct hns_roce_hem_mhop mhop = {};
807 808 809
		unsigned long buf_chunk_size;
		unsigned long bt_chunk_size;
		unsigned long bt_chunk_num;
810
		unsigned long num_bt_l0;
811 812
		u32 hop_num;

813
		if (get_hem_table_config(hr_dev, &mhop, type))
814
			return -EINVAL;
815 816 817 818 819 820

		buf_chunk_size = mhop.buf_chunk_size;
		bt_chunk_size = mhop.bt_chunk_size;
		num_bt_l0 = mhop.ba_l0_num;
		hop_num = mhop.hop_num;

821 822
		obj_per_chunk = buf_chunk_size / obj_size;
		num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
823
		bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
824
		if (type >= HEM_TYPE_MTT)
825
			num_bt_l0 = bt_chunk_num;
826 827 828 829 830 831

		table->hem = kcalloc(num_hem, sizeof(*table->hem),
					 GFP_KERNEL);
		if (!table->hem)
			goto err_kcalloc_hem_buf;

832
		if (check_whether_bt_num_3(type, hop_num)) {
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
			unsigned long num_bt_l1;

			num_bt_l1 = (num_hem + bt_chunk_num - 1) /
					     bt_chunk_num;
			table->bt_l1 = kcalloc(num_bt_l1,
					       sizeof(*table->bt_l1),
					       GFP_KERNEL);
			if (!table->bt_l1)
				goto err_kcalloc_bt_l1;

			table->bt_l1_dma_addr = kcalloc(num_bt_l1,
						 sizeof(*table->bt_l1_dma_addr),
						 GFP_KERNEL);

			if (!table->bt_l1_dma_addr)
				goto err_kcalloc_l1_dma;
		}
850

851 852
		if (check_whether_bt_num_2(type, hop_num) ||
			check_whether_bt_num_3(type, hop_num)) {
853 854 855 856 857 858 859 860 861 862 863 864
			table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0),
					       GFP_KERNEL);
			if (!table->bt_l0)
				goto err_kcalloc_bt_l0;

			table->bt_l0_dma_addr = kcalloc(num_bt_l0,
						 sizeof(*table->bt_l0_dma_addr),
						 GFP_KERNEL);
			if (!table->bt_l0_dma_addr)
				goto err_kcalloc_l0_dma;
		}
	}
865 866 867 868 869 870 871 872 873

	table->type = type;
	table->num_hem = num_hem;
	table->num_obj = nobj;
	table->obj_size = obj_size;
	table->lowmem = use_lowmem;
	mutex_init(&table->mutex);

	return 0;
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894

err_kcalloc_l0_dma:
	kfree(table->bt_l0);
	table->bt_l0 = NULL;

err_kcalloc_bt_l0:
	kfree(table->bt_l1_dma_addr);
	table->bt_l1_dma_addr = NULL;

err_kcalloc_l1_dma:
	kfree(table->bt_l1);
	table->bt_l1 = NULL;

err_kcalloc_bt_l1:
	kfree(table->hem);
	table->hem = NULL;

err_kcalloc_hem_buf:
	return -ENOMEM;
}

895 896
static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
					    struct hns_roce_hem_table *table)
897 898 899 900
{
	struct hns_roce_hem_mhop mhop;
	u32 buf_chunk_size;
	u64 obj;
901
	int i;
902

903 904
	if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
		return;
905 906
	buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
					mhop.bt_chunk_size;
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923

	for (i = 0; i < table->num_hem; ++i) {
		obj = i * buf_chunk_size / table->obj_size;
		if (table->hem[i])
			hns_roce_table_mhop_put(hr_dev, table, obj, 0);
	}

	kfree(table->hem);
	table->hem = NULL;
	kfree(table->bt_l1);
	table->bt_l1 = NULL;
	kfree(table->bt_l1_dma_addr);
	table->bt_l1_dma_addr = NULL;
	kfree(table->bt_l0);
	table->bt_l0 = NULL;
	kfree(table->bt_l0_dma_addr);
	table->bt_l0_dma_addr = NULL;
924 925 926 927 928
}

void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
				struct hns_roce_hem_table *table)
{
929
	struct device *dev = hr_dev->dev;
930 931
	unsigned long i;

932 933 934 935 936
	if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
		hns_roce_cleanup_mhop_hem_table(hr_dev, table);
		return;
	}

937 938
	for (i = 0; i < table->num_hem; ++i)
		if (table->hem[i]) {
939
			if (hr_dev->hw->clear_hem(hr_dev, table,
940
			    i * table->table_chunk_size / table->obj_size, 0))
941 942 943 944 945 946 947 948 949 950
				dev_err(dev, "Clear HEM base address failed.\n");

			hns_roce_free_hem(hr_dev, table->hem[i]);
		}

	kfree(table->hem);
}

void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
951
	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
952 953
		hns_roce_cleanup_hem_table(hr_dev,
					   &hr_dev->srq_table.table);
954
	hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
955 956 957 958 959 960
	if (hr_dev->caps.qpc_timer_entry_sz)
		hns_roce_cleanup_hem_table(hr_dev,
					   &hr_dev->qpc_timer_table);
	if (hr_dev->caps.cqc_timer_entry_sz)
		hns_roce_cleanup_hem_table(hr_dev,
					   &hr_dev->cqc_timer_table);
961
	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
962 963
		hns_roce_cleanup_hem_table(hr_dev,
					   &hr_dev->qp_table.sccc_table);
964 965 966
	if (hr_dev->caps.trrl_entry_sz)
		hns_roce_cleanup_hem_table(hr_dev,
					   &hr_dev->qp_table.trrl_table);
967 968 969 970

	if (hr_dev->caps.gmv_entry_sz)
		hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table);

971
	hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
972 973 974
	hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
	hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
}
975

976
struct hns_roce_hem_item {
977 978 979 980 981 982 983 984 985
	struct list_head list; /* link all hems in the same bt level */
	struct list_head sibling; /* link all hems in last hop for mtt */
	void *addr;
	dma_addr_t dma_addr;
	size_t count; /* max ba numbers */
	int start; /* start buf offset in this hem */
	int end; /* end buf offset in this hem */
};

986 987 988 989 990 991 992 993 994 995
/* All HEM items are linked in a tree structure */
struct hns_roce_hem_head {
	struct list_head branch[HNS_ROCE_MAX_BT_REGION];
	struct list_head root;
	struct list_head leaf;
};

static struct hns_roce_hem_item *
hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
		    bool exist_bt, int bt_level)
996
{
997
	struct hns_roce_hem_item *hem;
998 999 1000 1001 1002 1003

	hem = kzalloc(sizeof(*hem), GFP_KERNEL);
	if (!hem)
		return NULL;

	if (exist_bt) {
1004 1005
		hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
					       &hem->dma_addr, GFP_KERNEL);
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021
		if (!hem->addr) {
			kfree(hem);
			return NULL;
		}
	}

	hem->count = count;
	hem->start = start;
	hem->end = end;
	INIT_LIST_HEAD(&hem->list);
	INIT_LIST_HEAD(&hem->sibling);

	return hem;
}

static void hem_list_free_item(struct hns_roce_dev *hr_dev,
1022
			       struct hns_roce_hem_item *hem, bool exist_bt)
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
{
	if (exist_bt)
		dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
				  hem->addr, hem->dma_addr);
	kfree(hem);
}

static void hem_list_free_all(struct hns_roce_dev *hr_dev,
			      struct list_head *head, bool exist_bt)
{
1033
	struct hns_roce_hem_item *hem, *temp_hem;
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048

	list_for_each_entry_safe(hem, temp_hem, head, list) {
		list_del(&hem->list);
		hem_list_free_item(hr_dev, hem, exist_bt);
	}
}

static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
			     u64 table_addr)
{
	*(u64 *)(base_addr) = table_addr;
}

/* assign L0 table address to hem from root bt */
static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
1049
			       struct hns_roce_hem_item *hem, void *cpu_addr,
1050 1051 1052 1053 1054 1055
			       u64 phy_addr)
{
	hem->addr = cpu_addr;
	hem->dma_addr = (dma_addr_t)phy_addr;
}

1056
static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem,
1057 1058 1059 1060 1061
					     int offset)
{
	return (hem->start <= offset && offset <= hem->end);
}

1062 1063
static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list,
						      int page_offset)
1064
{
1065 1066
	struct hns_roce_hem_item *hem, *temp_hem;
	struct hns_roce_hem_item *found = NULL;
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155

	list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
		if (hem_list_page_is_in_range(hem, page_offset)) {
			found = hem;
			break;
		}
	}

	return found;
}

static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
{
	/*
	 * hopnum    base address table levels
	 * 0		L0(buf)
	 * 1		L0 -> buf
	 * 2		L0 -> L1 -> buf
	 * 3		L0 -> L1 -> L2 -> buf
	 */
	return bt_level >= (hopnum ? hopnum - 1 : hopnum);
}

/**
 * calc base address entries num
 * @hopnum: num of mutihop addressing
 * @bt_level: base address table level
 * @unit: ba entries per bt page
 */
static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
{
	u32 step;
	int max;
	int i;

	if (hopnum <= bt_level)
		return 0;
	/*
	 * hopnum  bt_level   range
	 * 1	      0       unit
	 * ------------
	 * 2	      0       unit * unit
	 * 2	      1       unit
	 * ------------
	 * 3	      0       unit * unit * unit
	 * 3	      1       unit * unit
	 * 3	      2       unit
	 */
	step = 1;
	max = hopnum - bt_level;
	for (i = 0; i < max; i++)
		step = step * unit;

	return step;
}

/**
 * calc the root ba entries which could cover all regions
 * @regions: buf region array
 * @region_cnt: array size of @regions
 * @unit: ba entries per bt page
 */
int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
				   int region_cnt, int unit)
{
	struct hns_roce_buf_region *r;
	int total = 0;
	int step;
	int i;

	for (i = 0; i < region_cnt; i++) {
		r = (struct hns_roce_buf_region *)&regions[i];
		if (r->hopnum > 1) {
			step = hem_list_calc_ba_range(r->hopnum, 1, unit);
			if (step > 0)
				total += (r->count + step - 1) / step;
		} else {
			total += r->count;
		}
	}

	return total;
}

static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
				 const struct hns_roce_buf_region *r, int unit,
				 int offset, struct list_head *mid_bt,
				 struct list_head *btm_bt)
{
1156
	struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
1157
	struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
1158
	struct hns_roce_hem_item *cur, *pre;
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
	const int hopnum = r->hopnum;
	int start_aligned;
	int distance;
	int ret = 0;
	int max_ofs;
	int level;
	u32 step;
	int end;

	if (hopnum <= 1)
		return 0;

	if (hopnum > HNS_ROCE_MAX_BT_LEVEL) {
		dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum);
		return -EINVAL;
	}

	if (offset < r->offset) {
1177
		dev_err(hr_dev->dev, "invalid offset %d, min %u!\n",
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
			offset, r->offset);
		return -EINVAL;
	}

	distance = offset - r->offset;
	max_ofs = r->offset + r->count - 1;
	for (level = 0; level < hopnum; level++)
		INIT_LIST_HEAD(&temp_list[level]);

	/* config L1 bt to last bt and link them to corresponding parent */
	for (level = 1; level < hopnum; level++) {
		cur = hem_list_search_item(&mid_bt[level], offset);
		if (cur) {
			hem_ptrs[level] = cur;
			continue;
		}

		step = hem_list_calc_ba_range(hopnum, level, unit);
		if (step < 1) {
			ret = -EINVAL;
			goto err_exit;
		}

		start_aligned = (distance / step) * step + r->offset;
		end = min_t(int, start_aligned + step - 1, max_ofs);
		cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
					  true, level);
		if (!cur) {
			ret = -ENOMEM;
			goto err_exit;
		}
		hem_ptrs[level] = cur;
		list_add(&cur->list, &temp_list[level]);
		if (hem_list_is_bottom_bt(hopnum, level))
			list_add(&cur->sibling, &temp_list[0]);

		/* link bt to parent bt */
		if (level > 1) {
			pre = hem_ptrs[level - 1];
			step = (cur->start - pre->start) / step * BA_BYTE_LEN;
			hem_list_link_bt(hr_dev, pre->addr + step,
					 cur->dma_addr);
		}
	}

	list_splice(&temp_list[0], btm_bt);
	for (level = 1; level < hopnum; level++)
		list_splice(&temp_list[level], &mid_bt[level]);

	return 0;

err_exit:
	for (level = 1; level < hopnum; level++)
		hem_list_free_all(hr_dev, &temp_list[level], true);

	return ret;
}

1236 1237 1238
static struct hns_roce_hem_item *
alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
	       const struct hns_roce_buf_region *regions, int region_cnt)
1239 1240
{
	const struct hns_roce_buf_region *r;
1241
	struct hns_roce_hem_item *hem;
1242
	int ba_num;
1243 1244
	int offset;

1245 1246
	ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
	if (ba_num < 1)
1247
		return ERR_PTR(-ENOMEM);
1248

1249
	if (ba_num > unit)
1250
		return ERR_PTR(-ENOBUFS);
1251

1252
	offset = regions[0].offset;
1253 1254
	/* indicate to last region */
	r = &regions[region_cnt - 1];
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274
	hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
				  ba_num, true, 0);
	if (!hem)
		return ERR_PTR(-ENOMEM);

	*max_ba_num = ba_num;

	return hem;
}

static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
			      u64 phy_base, const struct hns_roce_buf_region *r,
			      struct list_head *branch_head,
			      struct list_head *leaf_head)
{
	struct hns_roce_hem_item *hem;

	hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
				  r->count, false, 0);
	if (!hem)
1275 1276
		return -ENOMEM;

1277 1278 1279
	hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
	list_add(&hem->list, branch_head);
	list_add(&hem->sibling, leaf_head);
1280

1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
	return r->count;
}

static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
			   int unit, const struct hns_roce_buf_region *r,
			   const struct list_head *branch_head)
{
	struct hns_roce_hem_item *hem, *temp_hem;
	int total = 0;
	int offset;
	int step;

	step = hem_list_calc_ba_range(r->hopnum, 1, unit);
	if (step < 1)
		return -EINVAL;

	/* if exist mid bt, link L1 to L0 */
	list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
		offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
		hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr);
		total++;
	}

	return total;
}

static int
setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
	       int unit, int max_ba_num, struct hns_roce_hem_head *head,
	       const struct hns_roce_buf_region *regions, int region_cnt)
{
	const struct hns_roce_buf_region *r;
	struct hns_roce_hem_item *root_hem;
	void *cpu_base;
	u64 phy_base;
	int i, total;
	int ret;

	root_hem = list_first_entry(&head->root,
				    struct hns_roce_hem_item, list);
	if (!root_hem)
		return -ENOMEM;
1323 1324

	total = 0;
1325
	for (i = 0; i < region_cnt && total < max_ba_num; i++) {
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336
		r = &regions[i];
		if (!r->count)
			continue;

		/* all regions's mid[x][0] shared the root_bt's trunk */
		cpu_base = root_hem->addr + total * BA_BYTE_LEN;
		phy_base = root_hem->dma_addr + total * BA_BYTE_LEN;

		/* if hopnum is 0 or 1, cut a new fake hem from the root bt
		 * which's address share to all regions.
		 */
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
		if (hem_list_is_bottom_bt(r->hopnum, 0))
			ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r,
						 &head->branch[i], &head->leaf);
		else
			ret = setup_middle_bt(hr_dev, cpu_base, unit, r,
					      &hem_list->mid_bt[i][1]);

		if (ret < 0)
			return ret;

		total += ret;
1348 1349
	}

1350 1351
	list_splice(&head->leaf, &hem_list->btm_bt);
	list_splice(&head->root, &hem_list->root_bt);
1352
	for (i = 0; i < region_cnt; i++)
1353
		list_splice(&head->branch[i], &hem_list->mid_bt[i][0]);
1354 1355

	return 0;
1356
}
1357

1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
				  struct hns_roce_hem_list *hem_list, int unit,
				  const struct hns_roce_buf_region *regions,
				  int region_cnt)
{
	struct hns_roce_hem_item *root_hem;
	struct hns_roce_hem_head head;
	int max_ba_num;
	int ret;
	int i;

	root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset);
	if (root_hem)
		return 0;

	max_ba_num = 0;
	root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions,
				  region_cnt);
	if (IS_ERR(root_hem))
		return PTR_ERR(root_hem);

	/* List head for storing all allocated HEM items */
	INIT_LIST_HEAD(&head.root);
	INIT_LIST_HEAD(&head.leaf);
1382
	for (i = 0; i < region_cnt; i++)
1383
		INIT_LIST_HEAD(&head.branch[i]);
1384

1385 1386 1387 1388 1389 1390 1391 1392 1393 1394
	hem_list->root_ba = root_hem->dma_addr;
	list_add(&root_hem->list, &head.root);
	ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions,
			     region_cnt);
	if (ret) {
		for (i = 0; i < region_cnt; i++)
			hem_list_free_all(hr_dev, &head.branch[i], false);

		hem_list_free_all(hr_dev, &head.root, true);
	}
1395 1396 1397 1398 1399 1400 1401 1402

	return ret;
}

/* construct the base address table and link them by address hop config */
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
			      struct hns_roce_hem_list *hem_list,
			      const struct hns_roce_buf_region *regions,
1403
			      int region_cnt, unsigned int bt_pg_shift)
1404 1405 1406 1407
{
	const struct hns_roce_buf_region *r;
	int ofs, end;
	int unit;
1408
	int ret;
1409 1410 1411 1412 1413 1414 1415 1416
	int i;

	if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
		dev_err(hr_dev->dev, "invalid region region_cnt %d!\n",
			region_cnt);
		return -EINVAL;
	}

1417
	unit = (1 << bt_pg_shift) / BA_BYTE_LEN;
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
	for (i = 0; i < region_cnt; i++) {
		r = &regions[i];
		if (!r->count)
			continue;

		end = r->offset + r->count;
		for (ofs = r->offset; ofs < end; ofs += unit) {
			ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
						    hem_list->mid_bt[i],
						    &hem_list->btm_bt);
			if (ret) {
				dev_err(hr_dev->dev,
					"alloc hem trunk fail ret=%d!\n", ret);
				goto err_alloc;
			}
		}
	}

	ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
				     region_cnt);
	if (ret)
		dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret);
	else
		return 0;

err_alloc:
	hns_roce_hem_list_release(hr_dev, hem_list);

	return ret;
}

void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
			       struct hns_roce_hem_list *hem_list)
{
	int i, j;

	for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
		for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
			hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j],
					  j != 0);

	hem_list_free_all(hr_dev, &hem_list->root_bt, true);
	INIT_LIST_HEAD(&hem_list->btm_bt);
	hem_list->root_ba = 0;
}

1464
void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
{
	int i, j;

	INIT_LIST_HEAD(&hem_list->root_bt);
	INIT_LIST_HEAD(&hem_list->btm_bt);
	for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
		for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
			INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
}

void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
				 struct hns_roce_hem_list *hem_list,
				 int offset, int *mtt_cnt, u64 *phy_addr)
{
	struct list_head *head = &hem_list->btm_bt;
1480
	struct hns_roce_hem_item *hem, *temp_hem;
1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
	void *cpu_base = NULL;
	u64 phy_base = 0;
	int nr = 0;

	list_for_each_entry_safe(hem, temp_hem, head, sibling) {
		if (hem_list_page_is_in_range(hem, offset)) {
			nr = offset - hem->start;
			cpu_base = hem->addr + nr * BA_BYTE_LEN;
			phy_base = hem->dma_addr + nr * BA_BYTE_LEN;
			nr = hem->end + 1 - offset;
			break;
		}
	}

	if (mtt_cnt)
		*mtt_cnt = nr;

	if (phy_addr)
		*phy_addr = phy_base;

	return cpu_base;
}