Skip to content

Commit

Permalink
Fixing isue of discovering unsigned types (#1864)
Browse files Browse the repository at this point in the history
  • Loading branch information
VardhanThigle committed Sep 13, 2024
1 parent 75f1a2c commit 9926a2e
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ protected static String getSchemaDiscoveryQuery(SourceSchemaReference sourceSche

/**
* Discover Indexed columns and their Collations(if applicable). You could try this on <a href =
* https://www.db-fiddle.com/f/kRVPA5jDwZYNj2rsdtif4K/2>db-fiddle</a>
* https://www.db-fiddle.com/f/kRVPA5jDwZYNj2rsdtif4K/3>db-fiddle</a>
*
* @param sourceSchemaReference
* @return
Expand Down Expand Up @@ -440,7 +440,7 @@ private ImmutableList<SourceColumnIndexInfo> getTableIndexes(
// and collation called "binary".
// Ref https://dev.mysql.com/doc/refman/8.4/en/charset-binary-collations.html
// In information_schema.columns query, these column types show null as character set.
// Ref: https://www.db-fiddle.com/f/kRVPA5jDwZYNj2rsdtif4K/2
// Ref: https://www.db-fiddle.com/f/kRVPA5jDwZYNj2rsdtif4K/3
// Also for both mySQL 5.7 and 8.0 binary columns have a NO-PAD comparison.
// Ref: https://www.db-fiddle.com/f/kRVPA5jDwZYNj2rsdtif4K/0.
if (binaryColumnTypes.contains(columType) && characterSet == null) {
Expand Down Expand Up @@ -719,7 +719,7 @@ public enum MySqlVersion {

protected static final class InformationSchemaCols {
public static final String NAME_COL = "COLUMN_NAME";
public static final String TYPE_COL = "DATA_TYPE";
public static final String TYPE_COL = "COLUMN_TYPE";
public static final String CHAR_MAX_LENGTH_COL = "CHARACTER_MAXIMUM_LENGTH";
public static final String NUMERIC_PRECISION_COL = "NUMERIC_PRECISION";
public static final String NUMERIC_SCALE_COL = "NUMERIC_SCALE";
Expand All @@ -739,7 +739,7 @@ protected static final class InformationSchemaStatsCols {
public static final String NON_UNIQ_COL = "stats.NON_UNIQUE";
public static final String CARDINALITY_COL = "stats.CARDINALITY";

public static final String TYPE_COL = "cols.DATA_TYPE";
public static final String TYPE_COL = "cols.COLUMN_TYPE";
public static final String CHAR_MAX_LENGTH_COL = "cols.CHARACTER_MAXIMUM_LENGTH";
public static final String CHARACTER_SET_COL = "cols.CHARACTER_SET_NAME";
public static final String COLLATION_COL = "cols.COLLATION_NAME";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ long getCurrentTimeMicros() {
} catch (SQLException e) {
mapperErrors.inc();
logger.error(
"Exception while mapping jdbc ResultSet to avro. Check for potential schema changes. Exception: "
+ e);
"Exception while mapping jdbc ResultSet to avro. Check for potential schema changes or unexpected inaccuracy in schema discovery logs. SourceSchemaReference: {}, SourceTableSchema: {}. Exception: {}",
sourceSchemaReference,
sourceTableSchema,
e);
throw new ValueMappingException(e);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ public void testGetSchemaDiscoveryQuery() {
MysqlDialectAdapter.getSchemaDiscoveryQuery(
SourceSchemaReference.builder().setDbName("testDB").build()))
.isEqualTo(
"SELECT COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = 'testDB' AND TABLE_NAME = ?");
"SELECT COLUMN_NAME,COLUMN_TYPE,CHARACTER_MAXIMUM_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE FROM INFORMATION_SCHEMA.Columns WHERE TABLE_SCHEMA = 'testDB' AND TABLE_NAME = ?");
}

@Test
Expand Down Expand Up @@ -820,7 +820,7 @@ ResultSet createMock() throws SQLException {
.when(rs)
.getString(InformationSchemaCols.NAME_COL);

// mock rs.getString("DATA_TYPE");
// mock rs.getString("COLUMN_TYPE");
doAnswer(
invocation -> {
wasNull = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ private Map<String, List<Map<String, Object>>> getExpectedData() {
expectedData.put(
"bigint",
createRows("bigint", "40", "9223372036854775807", "-9223372036854775808", "NULL"));
expectedData.put(
"bigint_unsigned",
createRows("bigint_unsigned", "42", "0", "18446744073709551615", "NULL"));
expectedData.put(
"binary",
createRows("binary", "eDU4MD" + repeatString("A", 334), repeatString("/", 340), "NULL"));
Expand Down Expand Up @@ -185,10 +188,14 @@ private Map<String, List<Map<String, Object>>> getExpectedData() {
expectedData.put(
"mediumblob", createRows("mediumblob", "eDU4MDA=", repeatString("/", 87380), "NULL"));
expectedData.put("mediumint", createRows("mediumint", "20", "NULL"));
expectedData.put(
"mediumint_unsigned", createRows("mediumint_unsigned", "42", "0", "16777215", "NULL"));
expectedData.put(
"mediumtext",
createRows("mediumtext", "mediumtext", repeatString("a", 33) + "...", "NULL"));
expectedData.put("smallint", createRows("smallint", "15", "32767", "-32768", "NULL"));
expectedData.put(
"smallint_unsigned", createRows("smallint_unsigned", "42", "0", "65535", "NULL"));
expectedData.put("text", createRows("text", "xyz", repeatString("a", 33) + "...", "NULL"));
expectedData.put("time", createRows("time", "15:50:00", "838:59:59", "-838:59:59", "NULL"));
expectedData.put(
Expand All @@ -202,6 +209,7 @@ private Map<String, List<Map<String, Object>>> getExpectedData() {
expectedData.put(
"tinyblob", createRows("tinyblob", "eDU4MDA=", repeatString("/", 340), "NULL"));
expectedData.put("tinyint", createRows("tinyint", "10", "127", "-128", "NULL"));
expectedData.put("tinyint_unsigned", createRows("tinyint_unsigned", "0", "255", "NULL"));
expectedData.put(
"tinytext",
createRows("tinytext", "tinytext", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa...", "NULL"));
Expand All @@ -211,6 +219,8 @@ private Map<String, List<Map<String, Object>>> getExpectedData() {
"varchar", createRows("varchar", "abc", repeatString("a", 33) + "...", "NULL"));
expectedData.put("year", createRows("year", "2022", "1901", "2155", "NULL"));
expectedData.put("set", createRows("set", "v1,v2", "NULL"));
expectedData.put(
"integer_unsigned", createRows("integer_unsigned", "0", "42", "4294967296", "NULL"));
return expectedData;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ CREATE TABLE `tinyint_table` (
`tinyint_col` TINYINT DEFAULT NULL
);

CREATE TABLE `tinyint_unsigned_table` (
`id` INT PRIMARY KEY,
`tinyint_unsigned_col` TINYINT UNSIGNED DEFAULT NULL
);

CREATE TABLE `text_table` (
`id` INT PRIMARY KEY,
`text_col` TEXT CHARACTER SET utf8 DEFAULT NULL
Expand All @@ -23,16 +28,30 @@ CREATE TABLE `smallint_table` (
`smallint_col` SMALLINT DEFAULT NULL
);

CREATE TABLE `smallint_unsigned_table` (
`id` INT PRIMARY KEY,
`smallint_unsigned_col` SMALLINT UNSIGNED DEFAULT NULL
);

CREATE TABLE `mediumint_table` (
`id` INT PRIMARY KEY,
`mediumint_col` MEDIUMINT DEFAULT NULL
);

CREATE TABLE `mediumint_unsigned_table` (
`id` INT PRIMARY KEY,
`mediumint_unsigned_col` MEDIUMINT UNSIGNED DEFAULT NULL
);
CREATE TABLE `bigint_table` (
`id` INT PRIMARY KEY,
`bigint_col` BIGINT DEFAULT NULL
);

CREATE TABLE `bigint_unsigned_table` (
`id` INT PRIMARY KEY,
`bigint_unsigned_col` BIGINT UNSIGNEDDEFAULT NULL
);

CREATE TABLE `float_table` (
`id` INT PRIMARY KEY,
`float_col` FLOAT DEFAULT NULL
Expand Down Expand Up @@ -143,6 +162,11 @@ CREATE TABLE `int_table` (
`int_col` INT DEFAULT NULL
);

CREATE TABLE `integer_unsigned_table` (
`id` INT PRIMARY KEY,
`integer_unsigned_col` INTEGER UNSIGNED DEFAULT NULL
);

CREATE TABLE `timestamp_table` (
`id` INT PRIMARY KEY,
`timestamp_col` TIMESTAMP DEFAULT NULL
Expand All @@ -154,6 +178,7 @@ CREATE TABLE set_table (
);

ALTER TABLE `bigint_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `bigint_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `binary_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `bit_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `blob_table` MODIFY `id` INT AUTO_INCREMENT;
Expand All @@ -167,18 +192,22 @@ ALTER TABLE `double_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `enum_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `float_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `int_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `integer_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `json_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `longblob_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `longtext_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `mediumblob_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `mediumint_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `mediumint_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `mediumtext_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `smallint_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `smallint_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `text_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `time_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `timestamp_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `tinyblob_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `tinyint_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `tinyint_unsigned_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `tinytext_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `varbinary_table` MODIFY `id` INT AUTO_INCREMENT;
ALTER TABLE `varchar_table` MODIFY `id` INT AUTO_INCREMENT;
Expand All @@ -188,6 +217,9 @@ ALTER TABLE `set_table` MODIFY `id` INT AUTO_INCREMENT;
INSERT INTO `bigint_table` (`bigint_col`) VALUES (40);
INSERT INTO `bigint_table` (`bigint_col`) VALUES (9223372036854775807);
INSERT INTO `bigint_table` (`bigint_col`) VALUES (-9223372036854775808);
INSERT INTO `bigint_unsigned_table` (`bigint_unsigned_col`) VALUES (42);
INSERT INTO `bigint_unsigned_table` (`bigint_unsigned_col`) VALUES (0);
INSERT INTO `bigint_unsigned_table` (`bigint_unsigned_col`) VALUES (18446744073709551615);
INSERT INTO `binary_table` (`binary_col`) VALUES (x'7835383030000000000000000000000000000000');
INSERT INTO `binary_table` (`binary_col`) VALUES (REPEAT(X'FF', 255));
INSERT INTO `bit_table` (`bit_col`) VALUES (b'0111111111111111111111111111111111111111111111111111111111111111');
Expand Down Expand Up @@ -218,6 +250,9 @@ INSERT INTO `float_table` (`float_col`) VALUES (-3.4E38);
INSERT INTO `int_table` (`int_col`) VALUES (30);
INSERT INTO `int_table` (`int_col`) VALUES (2147483647);
INSERT INTO `int_table` (`int_col`) VALUES (-2147483648);
INSERT INTO `integer_unsigned_table` (`integer_unsigned_col`) VALUES (0);
INSERT INTO `integer_unsigned_table` (`integer_unsigned_col`) VALUES (42);
INSERT INTO `integer_unsigned_table` (`integer_unsigned_col`) VALUES (4294967296);
INSERT INTO `json_table` (`json_col`) VALUES ('{"k1": "v1"}');
INSERT INTO `longblob_table` (`longblob_col`) VALUES (X'7835383030');
INSERT INTO `longblob_table` (`longblob_col`) VALUES (REPEAT(X'FF', 65535));
Expand All @@ -226,11 +261,17 @@ INSERT INTO `longtext_table` (`longtext_col`) VALUES (REPEAT('a', 65535));
INSERT INTO `mediumblob_table` (`mediumblob_col`) VALUES (X'7835383030');
INSERT INTO `mediumblob_table` (`mediumblob_col`) VALUES (REPEAT(X'FF', 65535));
INSERT INTO `mediumint_table` (`mediumint_col`) VALUES (20);
INSERT INTO `mediumint_unsigned_table` (`mediumint_unsigned_col`) VALUES (42);
INSERT INTO `mediumint_unsigned_table` (`mediumint_unsigned_col`) VALUES (0);
INSERT INTO `mediumint_unsigned_table` (`mediumint_unsigned_col`) VALUES (16777215);
INSERT INTO `mediumtext_table` (`mediumtext_col`) VALUES ('mediumtext');
INSERT INTO `mediumtext_table` (`mediumtext_col`) VALUES (REPEAT('a', 65535));
INSERT INTO `smallint_table` (`smallint_col`) VALUES (15);
INSERT INTO `smallint_table` (`smallint_col`) VALUES (32767);
INSERT INTO `smallint_table` (`smallint_col`) VALUES (-32768);
INSERT INTO `smallint_unsigned_table` (`smallint_unsigned_col`) VALUES (42);
INSERT INTO `smallint_unsigned_table` (`smallint_unsigned_col`) VALUES (0);
INSERT INTO `smallint_unsigned_table` (`smallint_unsigned_col`) VALUES (65535);
INSERT INTO `text_table` (`text_col`) VALUES ('xyz');
INSERT INTO `text_table` (`text_col`) VALUES (REPEAT('a', 65535));
INSERT INTO `time_table` (`time_col`) VALUES ('15:50:00');
Expand All @@ -244,6 +285,8 @@ INSERT INTO `tinyblob_table` (`tinyblob_col`) VALUES (REPEAT(X'FF', 255));
INSERT INTO `tinyint_table` (`tinyint_col`) VALUES (10);
INSERT INTO `tinyint_table` (`tinyint_col`) VALUES (127);
INSERT INTO `tinyint_table` (`tinyint_col`) VALUES (-128);
INSERT INTO `tinyint_unsigned_table` (`tinyint_unsigned_col`) VALUES (0);
INSERT INTO `tinyint_unsigned_table` (`tinyint_unsigned_col`) VALUES (255);
INSERT INTO `tinytext_table` (`tinytext_col`) VALUES ('tinytext');
INSERT INTO `tinytext_table` (`tinytext_col`) VALUES (REPEAT('a', 255));
INSERT INTO `varbinary_table` (`varbinary_col`) VALUES (X'7835383030');
Expand All @@ -256,6 +299,7 @@ INSERT INTO `year_table` (`year_col`) VALUES (2155);
INSERT INTO `set_table` (`set_col`) VALUES ('v1,v2');

INSERT INTO `bigint_table` (`bigint_col`) VALUES (NULL);
INSERT INTO `bigint_unsigned_table` (`bigint_unsigned_col`) VALUES (NULL);
INSERT INTO `binary_table` (`binary_col`) VALUES (NULL);
INSERT INTO `bit_table` (`bit_col`) VALUES (NULL);
INSERT INTO `blob_table` (`blob_col`) VALUES (NULL);
Expand All @@ -269,18 +313,22 @@ INSERT INTO `double_table` (`double_col`) VALUES (NULL);
INSERT INTO `enum_table` (`enum_col`) VALUES (NULL);
INSERT INTO `float_table` (`float_col`) VALUES (NULL);
INSERT INTO `int_table` (`int_col`) VALUES (NULL);
INSERT INTO `integer_unsigned_table` (`integer_unsigned_col`) VALUES (NULL);
INSERT INTO `json_table` (`json_col`) VALUES (NULL);
INSERT INTO `longblob_table` (`longblob_col`) VALUES (NULL);
INSERT INTO `longtext_table` (`longtext_col`) VALUES (NULL);
INSERT INTO `mediumblob_table` (`mediumblob_col`) VALUES (NULL);
INSERT INTO `mediumint_table` (`mediumint_col`) VALUES (NULL);
INSERT INTO `mediumint_unsigned_table` (`mediumint_unsigned_col`) VALUES (NULL);
INSERT INTO `mediumtext_table` (`mediumtext_col`) VALUES (NULL);
INSERT INTO `smallint_table` (`smallint_col`) VALUES (NULL);
INSERT INTO `smallint_unsigned_table` (`smallint_unsigned_col`) VALUES (NULL);
INSERT INTO `text_table` (`text_col`) VALUES (NULL);
INSERT INTO `time_table` (`time_col`) VALUES (NULL);
INSERT INTO `timestamp_table` (`timestamp_col`) VALUES (NULL);
INSERT INTO `tinyblob_table` (`tinyblob_col`) VALUES (NULL);
INSERT INTO `tinyint_table` (`tinyint_col`) VALUES (NULL);
INSERT INTO `tinyint_unsigned_table` (`tinyint_unsigned_col`) VALUES (NULL);
INSERT INTO `tinytext_table` (`tinytext_col`) VALUES (NULL);
INSERT INTO `varbinary_table` (`varbinary_col`) VALUES (NULL);
INSERT INTO `varchar_table` (`varchar_col`) VALUES (NULL);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ CREATE TABLE bigint_table (
bigint_col INT64,
) PRIMARY KEY(id);

CREATE TABLE bigint_table (
id INT64 NOT NULL,
bigint_unsigned_col INT64,
) PRIMARY KEY(id);

CREATE TABLE binary_table (
id INT64 NOT NULL,
binary_col BYTES(MAX),
Expand Down Expand Up @@ -68,6 +73,11 @@ CREATE TABLE int_table (
int_col INT64,
) PRIMARY KEY(id);

CREATE TABLE `integer_unsigned_table` (
id INT64 NOT NULL,
int_col INT64,
) PRIMARY KEY(id);

CREATE TABLE json_table (
id INT64 NOT NULL,
json_col JSON,
Expand All @@ -89,10 +99,16 @@ CREATE TABLE mediumblob_table (
mediumblob_col BYTES(MAX),
) PRIMARY KEY(id);

CREATE TABLE mediumint_table (
id INT64 NOT NULL,
mediumint_col INT64,
) PRIMARY KEY(id);
create table mediumint_table (
id int64 not null,
mediumint_col int64,
) primary key(id);

create table mediumint_unsigned_table (
id int64 not null,
mediumint_unsigned_col int64,
) primary key(id);


CREATE TABLE mediumtext_table (
id INT64 NOT NULL,
Expand Down Expand Up @@ -134,6 +150,11 @@ CREATE TABLE tinyint_table (
tinyint_col INT64,
) PRIMARY KEY(id);

CREATE TABLE tinyint_unsigned_table (
id INT64 NOT NULL,
tinyint_unsigned_col INT64,
) PRIMARY KEY(id);

CREATE TABLE tinytext_table (
id INT64 NOT NULL,
tinytext_col STRING(MAX),
Expand Down

0 comments on commit 9926a2e

Please sign in to comment.