Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark adds four new data types: STRING BLOB TIMESTAMP DATE #434

Merged
merged 16 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions configuration/conf/config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@

############## 数据:设备、传感器、客户端 ##################
# 设备总数
# DEVICE_NUMBER=6000
# DEVICE_NUMBER=2000
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved

# 实际写入设备比例,(0, 1]
# REAL_INSERT_RATE=1.0
Expand Down Expand Up @@ -230,8 +230,8 @@
# 浮点数小数位数
# DOUBLE_LENGTH=2

# 插入数据的数据类型的比例,BOOLEAN:INT32:INT64:FLOAT:DOUBLE:TEXT
# INSERT_DATATYPE_PROPORTION=1:1:1:1:1:1
# 插入数据的数据类型的比例,BOOLEAN:INT32:INT64:FLOAT:DOUBLE:TEXT:STRING:BLOB:TIMESTAMP:DATE
# INSERT_DATATYPE_PROPORTION= 1:1:1:1:1:1:0:0:0:0
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved

################ IoTDB相关元数据参数 #####################
# 压缩方式 UNCOMPRESSED | SNAPPY | LZ4 (仅对IoTDB有效)
Expand All @@ -255,6 +255,17 @@
# TEXT: PLAIN/DICTIONARY 等,与对应 tsfile 版本中的 CompressionType 枚举类型保持一致
# ENCODING_TEXT=DICTIONARY

# STRING: PLAIN 暂不支持DICTIONARY,与对应 tsfile 版本中的 CompressionType 枚举类型保持一致
# ENCODING_STRING=PLAIN

# BLOB: PLAIN 暂不支持DICTIONARY,与对应 tsfile 版本中的 CompressionType 枚举类型保持一致
# ENCODING_BLOB=PLAIN

# TIMESTAMP: PLAIN/RLE/TS_2DIFF/GORILLA/ZIGZAG/CHIMP/SPRINTZ/RLBE,与对应 tsfile 版本中的 CompressionType 枚举类型保持一致
# ENCODING_TIMESTAMP=TS_2DIFF

# DATE: PLAIN/RLE/TS_2DIFF/GORILLA/ZIGZAG/CHIMP/SPRINTZ/RLBE,与对应 tsfile 版本中的 CompressionType 枚举类型保持一致
# ENCODING_DATE=TS_2DIFF
################ 真实数据集:测试数据 #####################
# 如下两个参数,当且仅当BENCHMARK_MODE = verificationWriteMode 和 verificationQueryMode 时生效
# 数据文件地址
Expand Down
52 changes: 45 additions & 7 deletions core/src/main/java/cn/edu/tsinghua/iot/benchmark/conf/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ public class Config {
/** The length of double */
private int DOUBLE_LENGTH = 2;
/**
* 插入数据的比例 Data Type, D1:D2:D3:D4:D5:D6 D1: BOOLEAN D2: INT32 D3: INT64 D4: FLOAT D5: DOUBLE D6:
* TEXT
* 插入数据的比例 Data Type, D1:D2:D3:D4:D5:D6:D7:D8:D9:D9:D10 D1: BOOLEAN D2: INT32 D3: INT64 D4: FLOAT
* D5: DOUBLE D6:TEXT D7: STRING D8: BLOB D9: TIMESTAMP D10: DATE
*/
private String INSERT_DATATYPE_PROPORTION = "1:1:1:1:1:1";
private String INSERT_DATATYPE_PROPORTION = "1:1:1:1:1:1:0:0:0:0";

/** The compress of data */
private String COMPRESSOR = "LZ4";
Expand All @@ -145,6 +145,14 @@ public class Config {
private String ENCODING_DOUBLE = "GORILLA";
/** The encoding of text */
private String ENCODING_TEXT = "DICTIONARY";
/** The encoding of string */
private String ENCODING_STRING = "PLAIN";
/** The encoding of blob */
private String ENCODING_BLOB = "PLAIN";
/** The encoding of timestamp */
private String ENCODING_TIMESTAMP = "TS_2DIFF";
/** The encoding of date */
private String ENCODING_DATE = "TS_2DIFF";

// 测试数据相关参数

Expand Down Expand Up @@ -511,7 +519,7 @@ public void initSensorFunction() {

/** According to the number of sensors, initialize the sensor number */
void initSensorCodes() {
int typeNumber = 6;
int typeNumber = 10;
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved
double[] probabilities = generateProbabilities(typeNumber);
if (probabilities.length == 0) {
return;
Expand All @@ -529,14 +537,12 @@ void initSensorCodes() {
}
}

/** Generate Probabilities according to proportion(e.g. 1:1:1:1:1:1) */
/** Generate Probabilities according to proportion(e.g. 1:1:1:1:1:1:1:1:1:1) */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can keep consistent with default value in config here. 1:1:1:1:1:1:0:0:0:0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

private double[] generateProbabilities(int typeNumber) {
// Probabilities for Types
double[] probabilities = new double[typeNumber + 1];
// Origin proportion array
double[] proportions = new double[typeNumber];
LOGGER.info(
"Init SensorTypes: BOOLEAN:INT32:INT64:FLOAT:DOUBLE:TEXT= {}", INSERT_DATATYPE_PROPORTION);

String[] split = INSERT_DATATYPE_PROPORTION.split(":");
if (split.length != typeNumber) {
Expand Down Expand Up @@ -801,6 +807,38 @@ public void setENCODING_TEXT(String ENCODING_TEXT) {
this.ENCODING_TEXT = ENCODING_TEXT;
}

public String getENCODING_STRING() {
return ENCODING_STRING;
}

public void setENCODING_STRING(String ENCODING_STRING) {
this.ENCODING_STRING = ENCODING_STRING;
}

public String getENCODING_BLOB() {
return ENCODING_BLOB;
}

public void setENCODING_BLOB(String ENCODING_BLOB) {
this.ENCODING_BLOB = ENCODING_BLOB;
}

public String getENCODING_TIMESTAMP() {
return ENCODING_TIMESTAMP;
}

public void setENCODING_TIMESTAMP(String ENCODING_TIMESTAMP) {
this.ENCODING_TIMESTAMP = ENCODING_TIMESTAMP;
}

public String getENCODING_DATE() {
return ENCODING_DATE;
}

public void setENCODING_DATE(String ENCODING_DATE) {
this.ENCODING_DATE = ENCODING_DATE;
}

public String getFILE_PATH() {
return FILE_PATH;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,21 @@ private void loadProps() {
properties.getProperty(
"INSERT_DATATYPE_PROPORTION", config.getINSERT_DATATYPE_PROPORTION()));

String INSERT_DATATYPE_PROPORTION = config.getINSERT_DATATYPE_PROPORTION();
String[] split = INSERT_DATATYPE_PROPORTION.split(":");
switch (split.length) {
case 6:
LOGGER.info(
"Init SensorTypes: BOOLEAN:INT32:INT64:FLOAT:DOUBLE:TEXT= {}",
INSERT_DATATYPE_PROPORTION);
break;
case 10:
LOGGER.info(
"Init SensorTypes: BOOLEAN:INT32:INT64:FLOAT:DOUBLE:TEXT:STRING:BLOB:TIMESTAMP:DATE= {}",
INSERT_DATATYPE_PROPORTION);
break;
}
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved

config.setCOMPRESSOR(properties.getProperty("COMPRESSOR", config.getCOMPRESSOR()));
config.setENCODING_BOOLEAN(
properties.getProperty("ENCODING_BOOLEAN", config.getENCODING_BOOLEAN()));
Expand All @@ -211,6 +226,12 @@ private void loadProps() {
config.setENCODING_DOUBLE(
properties.getProperty("ENCODING_DOUBLE", config.getENCODING_DOUBLE()));
config.setENCODING_TEXT(properties.getProperty("ENCODING_TEXT", config.getENCODING_TEXT()));
config.setENCODING_STRING(
properties.getProperty("ENCODING_STRING", config.getENCODING_STRING()));
config.setENCODING_BLOB(properties.getProperty("ENCODING_BLOB", config.getENCODING_BLOB()));
config.setENCODING_TIMESTAMP(
properties.getProperty("ENCODING_TIMESTAMP", config.getENCODING_TIMESTAMP()));
config.setENCODING_DATE(properties.getProperty("ENCODING_DATE", config.getENCODING_DATE()));

config.setFILE_PATH(properties.getProperty("FILE_PATH", config.getFILE_PATH()));
config.setBIG_BATCH_SIZE(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ public enum SensorType {
INT64("INT64"),
FLOAT("FLOAT"),
DOUBLE("DOUBLE"),
TEXT("TEXT");
TEXT("TEXT"),
STRING("STRING"),
BLOB("BLOB"),
TIMESTAMP("TIMESTAMP"),
DATE("DATE");

public String name;

Expand Down
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

对时序数据为非 iotdb 的场景,目前仍不支持 STRING BLOB TIMESTAMP DATE,因此在 precheck 中添加对类别数的特判。

Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,21 @@ public abstract class BaseMode {

protected abstract boolean preCheck();

// STRING BLOB TIMESTAMP DATE 当前只支持 iotdb
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved
protected boolean checkInsertDataTypeProportion() {
String dbType = config.getDbConfig().getDB_SWITCH().getType().toString();
String[] splits = config.getINSERT_DATATYPE_PROPORTION().split(":");
if (splits.length > 6 && !dbType.equals("IoTDB") && !dbType.equals("DoubleIoTDB")) {
for (int i = 6; i < splits.length; i++) {
if (splits[i].equals("0")) {
LOGGER.warn("INSERT_DATATYPE_PROPORTION error, please check this parameter.");
return false;
}
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

magic number 6,在Config类定义一个OLD_xxxxx

magic String “IoTDB”,从一些类型枚举类里找找?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typeNumber 已经改成 TYPENUMBER,能够在配置文件中修改。但修改时要保证其与 INSERT_DATATYPE_PROPORTION 的比例数相同。

return true;
}

/** Start benchmark */
public void run() {
if (!preCheck()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public class GenerateDataMode extends BaseMode {

@Override
protected boolean preCheck() {
if (!checkInsertDataTypeProportion()) return false;
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved
return SchemaWriter.getBasicWriter().writeSchema(metaDataSchema.getAllDeviceSchemas());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@
import cn.edu.tsinghua.iot.benchmark.measurement.persistence.PersistenceFactory;
import cn.edu.tsinghua.iot.benchmark.measurement.persistence.TestDataPersistence;
import cn.edu.tsinghua.iot.benchmark.tsdb.DBConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

public class TestWithDefaultPathMode extends BaseMode {

private static final Logger LOGGER = LoggerFactory.getLogger(TestWithDefaultPathMode.class);
private static final Config config = ConfigDescriptor.getInstance().getConfig();

@Override
Expand All @@ -51,7 +54,8 @@ protected boolean preCheck() {
if (config.isCREATE_SCHEMA() && (!registerSchema())) {
return false;
}
return true;
// 非 iotdb 时,INSERT_DATATYPE_PROPORTION 只能包含前 6 种数据类型
return checkInsertDataTypeProportion();
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ protected boolean preCheck() {
if (config.isCREATE_SCHEMA() && (!registerSchema())) {
return false;
}
return true;
return checkInsertDataTypeProportion();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok 放到了ConfigDescribetor.checkConfig()中,每种benchmark 模式都会进行此项配置检查。

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
import cn.edu.tsinghua.iot.benchmark.distribution.PoissonDistribution;
import cn.edu.tsinghua.iot.benchmark.distribution.ProbTool;
import cn.edu.tsinghua.iot.benchmark.entity.Sensor;
import cn.edu.tsinghua.iot.benchmark.entity.enums.SensorType;
import cn.edu.tsinghua.iot.benchmark.exception.WorkloadException;
import cn.edu.tsinghua.iot.benchmark.function.Function;
import cn.edu.tsinghua.iot.benchmark.function.FunctionParam;
import cn.edu.tsinghua.iot.benchmark.utils.TimeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
Expand Down Expand Up @@ -152,37 +152,40 @@ private static Object[][] initWorkloadValues() {
// periodic
long currentTimestamp = getCurrentTimestampStatic(i);
Object value;
if (sensor.getSensorType() == SensorType.TEXT) {
// TEXT case: pick STRING_LENGTH chars to be a String for insertion.
StringBuffer builder = new StringBuffer(config.getSTRING_LENGTH());
for (int k = 0; k < config.getSTRING_LENGTH(); k++) {
builder.append(CHAR_TABLE.charAt(dataRandom.nextInt(CHAR_TABLE.length())));
}
value = builder.toString();
} else {
// not TEXT case
FunctionParam param = config.getSENSOR_FUNCTION().get(sensor.getName());
Number number = Function.getValueByFunctionIdAndParam(param, currentTimestamp);
switch (sensor.getSensorType()) {
case BOOLEAN:
value = number.floatValue() > ((param.getMax() + param.getMin()) / 2);
break;
case INT32:
value = number.intValue();
break;
case INT64:
value = number.longValue();
break;
case FLOAT:
value = number.floatValue();
break;
case DOUBLE:
value = Math.round(number.doubleValue() * ratio) / ratio;
break;
default:
value = null;
break;
}
FunctionParam param = config.getSENSOR_FUNCTION().get(sensor.getName());
Number number = Function.getValueByFunctionIdAndParam(param, currentTimestamp);
switch (sensor.getSensorType()) {
case BOOLEAN:
value = number.floatValue() > ((param.getMax() + param.getMin()) / 2);
break;
case INT32:
value = number.intValue();
break;
case INT64:
case TIMESTAMP:
value = number.longValue();
break;
case FLOAT:
value = number.floatValue();
break;
case DOUBLE:
value = Math.round(number.doubleValue() * ratio) / ratio;
break;
case TEXT:
case STRING:
case BLOB:
StringBuffer builder = new StringBuffer(config.getSTRING_LENGTH());
for (int k = 0; k < config.getSTRING_LENGTH(); k++) {
builder.append(CHAR_TABLE.charAt(dataRandom.nextInt(CHAR_TABLE.length())));
}
value = builder.toString();
break;
OneSizeFitsQuorum marked this conversation as resolved.
Show resolved Hide resolved
case DATE:
value = LocalDate.ofEpochDay(number.intValue());
break;
default:
value = null;
break;
liyuheng55555 marked this conversation as resolved.
Show resolved Hide resolved
}
workloadValues[sensorIndex][i] = value;
}
Expand Down
2 changes: 1 addition & 1 deletion iotdb-1.3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
<properties>
<!-- This was the last version to support Java 8 -->
<logback.version>1.3.14</logback.version>
<iotdb.version>1.3.1</iotdb.version>
<iotdb.version>1.3.3-SNAPSHOT</iotdb.version>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里不要改成snapshot版本

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里因为新数据类型是 1.3.3 版本才有的,为了本地方便可以先搞成快照版本,这样本地 mvn install 就能直接用了。

反正这个 PR 暂不合并,我们大家都 review 完打个包给测试组就行。

等后面开源版 1.3.3 发出来了,我们就替换成 1.3.3,然后合并这个 PR

<okhttp3.version>4.12.0</okhttp3.version>
<gson.version>2.10.1</gson.version>
</properties>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import org.apache.iotdb.rpc.IoTDBConnectionException;
import org.apache.iotdb.rpc.StatementExecutionException;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.write.record.Tablet;

import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.write.record.Tablet;

import java.util.List;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import org.apache.iotdb.isession.SessionDataSet;
import org.apache.iotdb.rpc.IoTDBConnectionException;
import org.apache.iotdb.rpc.StatementExecutionException;
import org.apache.iotdb.tsfile.read.common.RowRecord;

import org.apache.tsfile.read.common.RowRecord;

public interface ISessionDataSet {
RowRecord next() throws IoTDBConnectionException, StatementExecutionException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
import org.apache.iotdb.rpc.StatementExecutionException;
import org.apache.iotdb.session.Session;
import org.apache.iotdb.session.template.MeasurementNode;
import org.apache.iotdb.tsfile.file.metadata.enums.CompressionType;
import org.apache.iotdb.tsfile.file.metadata.enums.TSDataType;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;

import cn.edu.tsinghua.iot.benchmark.client.operation.Operation;
import cn.edu.tsinghua.iot.benchmark.conf.Config;
Expand Down Expand Up @@ -55,6 +52,9 @@
import cn.edu.tsinghua.iot.benchmark.workload.query.impl.RangeQuery;
import cn.edu.tsinghua.iot.benchmark.workload.query.impl.ValueRangeQuery;
import cn.edu.tsinghua.iot.benchmark.workload.query.impl.VerificationQuery;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.file.metadata.enums.CompressionType;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -937,6 +937,14 @@ String getEncodingType(SensorType dataSensorType) {
return config.getENCODING_DOUBLE();
case TEXT:
return config.getENCODING_TEXT();
case STRING:
return config.getENCODING_STRING();
case BLOB:
return config.getENCODING_BLOB();
case TIMESTAMP:
return config.getENCODING_TIMESTAMP();
case DATE:
return config.getENCODING_DATE();
default:
LOGGER.error("Unsupported data sensorType {}.", dataSensorType);
return null;
Expand Down
Loading