Skip to content

Commit

Permalink
[refactor](log) Add some log for table and db transaction manager's l…
Browse files Browse the repository at this point in the history
…ock (#40746)

Use the fe config like #39015, print table lock usage duration.
In 2.0.6, I found `replayer` thread is blocked for a long time.
```
2024-09-12 11:00:58,797 INFO (replayer|89) [DatabaseTransactionMgr.replayUpsertTransactionState():1977] replay a visible transaction TransactionState. transaction id: 471728922, label: flink_connector_20240912_103150_269f8d635ffb455bb4043eecd1a8406b, db id: 475180, table id list: 14473765, callback id: -1, coordinator: BE: 10.217.0.100, transaction status: VISIBLE, error replicas num: 0, replica ids: , prepare time: 1726108310505, commit time: 1726108310579, finish time: 1726108310604, reason: 
2024-09-12 11:00:58,798 WARN (replayer|89) [Env.replayJournal():2575] entityCost:940256 loadJournalCost:940256 logId:1392353661 replayedJournalId:1392353661 code:100 size:562
```
And, there are many table lock fail log:
```
2024-09-12 10:56:53,167 WARN (mysql-nio-pool-32036|412611) [Table.tryReadLock():155] Failed to try table atdpersonpaycode's read lock. timeout 1 MINUTES. Current owner: null
```
But, I don't kown what thread hold the table lock.
So, add some logs to print lock holding info.
  • Loading branch information
liutang123 authored Sep 20, 2024
1 parent 761a8eb commit abefe26
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2092,6 +2092,10 @@ public class Config extends ConfigBase {
@ConfField
public static long lock_reporting_threshold_ms = 500L;

@ConfField(mutable = true, description = {"表示最大锁持有时间,超过该时间会打印告警日志,单位秒",
"Maximum lock hold time; logs a warning if exceeded"})
public static long max_lock_hold_threshold_seconds = 10L;

/**
* If false, when select from tables in information_schema database,
* the result will not contain the information of the table in external catalog.
Expand Down
46 changes: 46 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import org.apache.doris.alter.AlterCancelException;
import org.apache.doris.analysis.CreateTableStmt;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.io.Text;
Expand Down Expand Up @@ -73,6 +74,8 @@ public abstract class Table extends MetaObject implements Writable, TableIf {
protected TableType type;
protected long createTime;
protected QueryableReentrantReadWriteLock rwLock;
private final ThreadLocal<Long> lockReadStart = new ThreadLocal<>();
private long lockWriteStart;

/*
* fullSchema and nameToColumn should contains all columns, both visible and shadow.
Expand Down Expand Up @@ -146,6 +149,7 @@ public void unmarkDropped() {

public void readLock() {
this.rwLock.readLock().lock();
lockReadStart.set(System.currentTimeMillis());
}

public boolean tryReadLock(long timeout, TimeUnit unit) {
Expand All @@ -155,6 +159,9 @@ public boolean tryReadLock(long timeout, TimeUnit unit) {
LOG.warn("Failed to try table {}'s read lock. timeout {} {}. Current owner: {}",
name, timeout, unit.name(), rwLock.getOwner());
}
if (res) {
lockReadStart.set(System.currentTimeMillis());
}
return res;
} catch (InterruptedException e) {
LOG.warn("failed to try read lock at table[" + name + "]", e);
Expand All @@ -163,11 +170,16 @@ public boolean tryReadLock(long timeout, TimeUnit unit) {
}

public void readUnlock() {
if (lockReadStart.get() != null) {
checkAndLogLockDuration("read", lockReadStart.get(), System.currentTimeMillis());
lockReadStart.remove();
}
this.rwLock.readLock().unlock();
}

public void writeLock() {
this.rwLock.writeLock().lock();
lockWriteStart = System.currentTimeMillis();
}

public boolean writeLockIfExist() {
Expand All @@ -176,16 +188,49 @@ public boolean writeLockIfExist() {
this.rwLock.writeLock().unlock();
return false;
}
lockWriteStart = System.currentTimeMillis();
return true;
}

private void checkAndLogLockDuration(String type, long lockStart, long lockEnd) {
long duration = lockEnd - lockStart;
if (duration > Config.max_lock_hold_threshold_seconds * 1000) {
StringBuilder msgBuilder = new StringBuilder();
msgBuilder.append(getId())
.append(" ")
.append(getName())
.append(" ")
.append(type)
.append(" lock is held at ")
.append(lockStart)
.append(".And release after ")
.append(duration)
.append(" ms.")
.append("Call stack is :\n")
.append(getStackTrace(Thread.currentThread()));
LOG.info(msgBuilder.toString());
}
}

private static String getStackTrace(Thread t) {
final StackTraceElement[] stackTrace = t.getStackTrace();
StringBuilder msgBuilder = new StringBuilder();
for (StackTraceElement e : stackTrace) {
msgBuilder.append(e.toString()).append("\n");
}
return msgBuilder.toString();
}

public boolean tryWriteLock(long timeout, TimeUnit unit) {
try {
boolean res = this.rwLock.writeLock().tryLock(timeout, unit);
if (!res && unit.toSeconds(timeout) >= 1) {
LOG.warn("Failed to try table {}'s write lock. timeout {} {}. Current owner: {}",
name, timeout, unit.name(), rwLock.getOwner());
}
if (res) {
lockWriteStart = System.currentTimeMillis();
}
return res;
} catch (InterruptedException e) {
LOG.warn("failed to try write lock at table[" + name + "]", e);
Expand All @@ -194,6 +239,7 @@ public boolean tryWriteLock(long timeout, TimeUnit unit) {
}

public void writeUnlock() {
checkAndLogLockDuration("write", lockWriteStart, System.currentTimeMillis());
this.rwLock.writeLock().unlock();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,14 +165,20 @@ private enum PublishResult {
private volatile long usedQuotaDataBytes = -1;

private long lockWriteStart;
private final ThreadLocal<Long> lockReadStart = new ThreadLocal<>();

private long lockReportingThresholdMs = Config.lock_reporting_threshold_ms;
private final long lockReportingThresholdMs = Config.lock_reporting_threshold_ms;

protected void readLock() {
this.transactionLock.readLock().lock();
lockReadStart.set(System.currentTimeMillis());
}

protected void readUnlock() {
if (lockReadStart.get() != null) {
checkAndLogLockDuration("read", lockReadStart.get(), System.currentTimeMillis());
lockReadStart.remove();
}
this.transactionLock.readLock().unlock();
}

Expand All @@ -182,7 +188,7 @@ protected void writeLock() {
}

protected void writeUnlock() {
checkAndLogWriteLockDuration(lockWriteStart, System.currentTimeMillis());
checkAndLogLockDuration("write", lockWriteStart, System.currentTimeMillis());
this.transactionLock.writeLock().unlock();
}

Expand Down Expand Up @@ -2211,11 +2217,12 @@ public long getTxnNumByStatus(TransactionStatus status) {
* @param lockStart holing lock start time.
* @param lockEnd release lock time.
*/
private void checkAndLogWriteLockDuration(long lockStart, long lockEnd) {
private void checkAndLogLockDuration(String type, long lockStart, long lockEnd) {
long duration = lockEnd - lockStart;
if (duration > lockReportingThresholdMs) {
StringBuilder msgBuilder = new StringBuilder();
msgBuilder.append("lock is held at ")
msgBuilder.append(type)
.append(" lock is held at ")
.append(lockStart)
.append(".And release after ")
.append(duration)
Expand Down

0 comments on commit abefe26

Please sign in to comment.