-
Notifications
You must be signed in to change notification settings - Fork 76
Fix resumable binlog position after interrupt #168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fe43779
b3cd656
38f9c93
9bf9684
d2dc8cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,9 +5,10 @@ import ( | |
| "crypto/tls" | ||
| sqlorig "database/sql" | ||
| "fmt" | ||
| sql "github.com/Shopify/ghostferry/sqlwrapper" | ||
| "time" | ||
|
|
||
| sql "github.com/Shopify/ghostferry/sqlwrapper" | ||
|
|
||
| "github.com/siddontang/go-mysql/mysql" | ||
| "github.com/siddontang/go-mysql/replication" | ||
| "github.com/sirupsen/logrus" | ||
|
|
@@ -24,12 +25,13 @@ type BinlogStreamer struct { | |
|
|
||
| TableSchema TableSchemaCache | ||
|
|
||
| binlogSyncer *replication.BinlogSyncer | ||
| binlogStreamer *replication.BinlogStreamer | ||
| lastStreamedBinlogPosition mysql.Position | ||
| targetBinlogPosition mysql.Position | ||
| lastProcessedEventTime time.Time | ||
| lastLagMetricEmittedTime time.Time | ||
| binlogSyncer *replication.BinlogSyncer | ||
| binlogStreamer *replication.BinlogStreamer | ||
| lastStreamedBinlogPosition mysql.Position | ||
| lastResumableBinlogPosition mysql.Position | ||
| targetBinlogPosition mysql.Position | ||
| lastProcessedEventTime time.Time | ||
| lastLagMetricEmittedTime time.Time | ||
|
|
||
| stopRequested bool | ||
|
|
||
|
|
@@ -98,6 +100,7 @@ func (s *BinlogStreamer) ConnectBinlogStreamerToMysqlFrom(startFromBinlogPositio | |
| } | ||
|
|
||
| s.lastStreamedBinlogPosition = startFromBinlogPosition | ||
| s.lastResumableBinlogPosition = startFromBinlogPosition | ||
|
|
||
| s.logger.WithFields(logrus.Fields{ | ||
| "file": s.lastStreamedBinlogPosition.Name, | ||
|
|
@@ -122,7 +125,6 @@ func (s *BinlogStreamer) Run() { | |
| }() | ||
|
|
||
| s.logger.Info("starting binlog streamer") | ||
|
|
||
| for !s.stopRequested || (s.stopRequested && s.lastStreamedBinlogPosition.Compare(s.targetBinlogPosition) < 0) { | ||
| var ev *replication.BinlogEvent | ||
| var timedOut bool | ||
|
|
@@ -153,6 +155,9 @@ func (s *BinlogStreamer) Run() { | |
| case *replication.RotateEvent: | ||
| // This event is needed because we need to update the last successful | ||
| // binlog position. | ||
| s.lastResumableBinlogPosition.Pos = uint32(e.Position) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we need to set the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Following up on our conversation, we decided to set the name here and in the |
||
| s.lastResumableBinlogPosition.Name = string(e.NextLogName) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wait, I thought we agreed we can't resume from a RotateEvent's position?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the only place where MySQL replication gives us the file name of the binlog. This value is therefore cached until the next Rotate event, which would give us the next file name. So we have to set this filename here?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, why can't we use a separate the current filename needs a clear separation from the resumable position. |
||
|
|
||
| s.lastStreamedBinlogPosition.Pos = uint32(e.Position) | ||
| s.lastStreamedBinlogPosition.Name = string(e.NextLogName) | ||
| s.logger.WithFields(logrus.Fields{ | ||
|
|
@@ -165,7 +170,6 @@ func (s *BinlogStreamer) Run() { | |
| s.logger.WithError(err).Error("failed to handle rows event") | ||
| s.ErrorHandler.Fatal("binlog_streamer", err) | ||
| } | ||
|
|
||
| s.updateLastStreamedPosAndTime(ev) | ||
| case *replication.FormatDescriptionEvent: | ||
| // This event has a LogPos = 0, presumably because this is the first | ||
|
|
@@ -183,6 +187,33 @@ func (s *BinlogStreamer) Run() { | |
| // with empty GenericEvent structs. | ||
| // so there's no way to handle this for us. | ||
| continue | ||
| case *replication.XIDEvent, *replication.GTIDEvent: | ||
| // With regards to DMLs, we see (at least) the following sequence | ||
| // of events in the binlog stream: | ||
| // | ||
| // - GTIDEvent <- START of transaction | ||
| // - QueryEvent | ||
| // - RowsQueryEvent | ||
| // - TableMapEvent | ||
| // - RowsEvent | ||
| // - RowsEvent | ||
| // - XIDEvent <- END of transaction | ||
| // | ||
| // *NOTE* | ||
| // | ||
| // First, RowsQueryEvent is only available with `binlog_rows_query_log_events` | ||
| // set to "ON". | ||
| // | ||
| // Second, there will be at least one (but potentially more) RowsEvents | ||
| // depending on the number of rows updated in the transaction. | ||
| // | ||
| // Lastly, GTIDEvents will only be available if they are enabled. | ||
| // | ||
| // As a result, the following case will set the last resumable position for | ||
| // interruption to EITHER the start (if using GTIDs) or the end of the | ||
| // last transaction | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Succinct explanation. Thank you! |
||
| s.lastResumableBinlogPosition.Pos = uint32(ev.Header.LogPos) | ||
|
hkdsun marked this conversation as resolved.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's document in code comments what got us to this? 🙏
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can do 👍 |
||
| s.updateLastStreamedPosAndTime(ev) | ||
| default: | ||
| s.updateLastStreamedPosAndTime(ev) | ||
| } | ||
|
|
@@ -261,7 +292,7 @@ func (s *BinlogStreamer) handleRowsEvent(ev *replication.BinlogEvent) error { | |
| return nil | ||
| } | ||
|
|
||
| dmlEvs, err := NewBinlogDMLEvents(table, ev, pos) | ||
| dmlEvs, err := NewBinlogDMLEvents(table, ev, pos, s.lastResumableBinlogPosition) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't have to do this now, but this was brought up the other day: Since we will eventually start streaming binlog from the target for verification, maybe we should rename this to "stopAtBinlogPosition", or something to that effect.
I just wanted to mention this so we don't forget about it.