共计 15625 个字符,预计需要花费 40 分钟才能阅读完成。
OAL
基础知识
根本介绍
OAL(Observability Analysis Language)
是一门用来剖析流式数据的语言。因为
OAL
聚焦于度量Service
、Service Instance
和Endpoint
的指标,所以它学习和应用起来非常简单。
OAL
基于altlr
与javassist
将oal
脚本转化为动静生成的类文件。自从
6.3
版本后,OAL
引擎内置在OAP
服务器中,能够看做oal-rt(OAL Runtime)
。OAL
脚本地位OAL
配置目录下 (/config/oal
),使用者可能更改脚本并重启失效。留神:OAL
脚本依然是一门编译语言,oal-rt
动静的生成Java
代码。如果你配置了环境变量
SW_OAL_ENGINE_DEBUG=Y
,能在工作目录下的oal-rt
目录下找到生成的Class
文件。
语法
// 申明一个指标 | |
METRICS_NAME = from(SCOPE.(* | [FIELD][,FIELD ...])) // 从某一个 SCOPE 中获取数据 | |
[.filter(FIELD OP [INT | STRING])] // 能够过滤掉局部数据 | |
.FUNCTION([PARAM][, PARAM ...]) // 应用某个聚合函数将数据聚合 | |
// 禁用一个指标 | |
disable(METRICS_NAME); |
语法案例
oap-server/server-bootstrap/src/main/resources/oal/java-agent.oal
// 从 ServiceInstanceJVMMemory 的 used 获取数据,只须要 heapStatus 为 true 的数据,并取 long 型的平均值 | |
instance_jvm_memory_heap = from(ServiceInstanceJVMMemory.used).filter(heapStatus == true).longAvg(); |
org.apache.skywalking.oap.server.core.source.ServiceInstanceJVMMemory
@ScopeDeclaration(id = SERVICE_INSTANCE_JVM_MEMORY, name = "ServiceInstanceJVMMemory", catalog = SERVICE_INSTANCE_CATALOG_NAME) | |
@ScopeDefaultColumn.VirtualColumnDefinition(fieldName = "entityId", columnName = "entity_id", isID = true, type = String.class) | |
public class ServiceInstanceJVMMemory extends Source { | |
@Override | |
public int scope() {return DefaultScopeDefine.SERVICE_INSTANCE_JVM_MEMORY;} | |
@Override | |
public String getEntityId() {return String.valueOf(id); | |
} | |
@Getter @Setter | |
private String id; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "name", requireDynamicActive = true) | |
private String name; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "service_name", requireDynamicActive = true) | |
private String serviceName; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "service_id") | |
private String serviceId; | |
@Getter @Setter | |
private boolean heapStatus; | |
@Getter @Setter | |
private long init; | |
@Getter @Setter | |
private long max; | |
@Getter @Setter | |
private long used; | |
@Getter @Setter | |
private long committed; | |
} |
可供参考的官网文档:Observability Analysis Language
从一个案例开始剖析 OAL
原理
短少的类加载信息监控
默认的 APM/Instance
页面,短少对于 JVM Class
的信息(如下图所示),故这次将相干信息补齐。由这次案例来剖析 OAL
的原理。
在 Skywalking-04:扩大 Metric 监控信息 中,讲到了如何在已有 Source
类的状况下,减少一些指标。
这次间接连 Source
类以及 OAL
词法语法关键字都本人定义。
可供参考的官网文档:Source and Scope extension for new metrics
确定减少的指标
通过 Java ManagementFactory 解析这篇文章,能够确定监控指标为“以后加载类的数量”、“已卸载类的数量”、“一共加载类的数量”三个指标
ClassLoadingMXBean classLoadingMXBean = ManagementFactory.getClassLoadingMXBean(); | |
// 以后加载类的数量 | |
int loadedClassCount = classLoadingMXBean.getLoadedClassCount(); | |
// 已卸载类的数量 | |
long unloadedClassCount = classLoadingMXBean.getUnloadedClassCount(); | |
// 一共加载类的数量 | |
long totalLoadedClassCount = classLoadingMXBean.getTotalLoadedClassCount(); |
定义 agent
与 oap server
通信类
在 apm-protocol/apm-network/src/main/proto/language-agent/JVMMetric.proto
协定文件中减少如下定义。
在 apm-protocol/apm-network
目录下执行 mvn clean package -DskipTests=true
会生成新的相干 Java
类,org.apache.skywalking.apm.network.language.agent.v3.Class
该类就是咱们在代码中实际操作的类。
message Class { | |
int64 loadedClassCount = 1; | |
int64 unloadedClassCount = 3; | |
int64 totalLoadedClassCount = 2; | |
} | |
message JVMMetric { | |
int64 time = 1; | |
CPU cpu = 2; | |
repeated Memory memory = 3; | |
repeated MemoryPool memoryPool = 4; | |
repeated GC gc = 5; | |
Thread thread = 6; | |
// 在 JVM 指标中增加 Class 的定义 | |
Class clazz = 7; | |
} |
收集 agent
的信息后,将信息发送至 oap server
收集 Class
相干的指标信息
package org.apache.skywalking.apm.agent.core.jvm.clazz; | |
import org.apache.skywalking.apm.network.language.agent.v3.Class; | |
import java.lang.management.ClassLoadingMXBean; | |
import java.lang.management.ManagementFactory; | |
public enum ClassProvider { | |
/** | |
* instance | |
*/ | |
INSTANCE; | |
private final ClassLoadingMXBean classLoadingMXBean; | |
ClassProvider() {this.classLoadingMXBean = ManagementFactory.getClassLoadingMXBean(); | |
} | |
// 构建 class 的指标信息 | |
public Class getClassMetrics() {int loadedClassCount = classLoadingMXBean.getLoadedClassCount(); | |
long unloadedClassCount = classLoadingMXBean.getUnloadedClassCount(); | |
long totalLoadedClassCount = classLoadingMXBean.getTotalLoadedClassCount(); | |
return Class.newBuilder().setLoadedClassCount(loadedClassCount) | |
.setUnloadedClassCount(unloadedClassCount) | |
.setTotalLoadedClassCount(totalLoadedClassCount) | |
.build();} | |
} |
在 org.apache.skywalking.apm.agent.core.jvm.JVMService#run
办法中,将 class
相干指标设置到 JVM
指标类中
@Override | |
public void run() {long currentTimeMillis = System.currentTimeMillis(); | |
try {JVMMetric.Builder jvmBuilder = JVMMetric.newBuilder(); | |
jvmBuilder.setTime(currentTimeMillis); | |
jvmBuilder.setCpu(CPUProvider.INSTANCE.getCpuMetric()); | |
jvmBuilder.addAllMemory(MemoryProvider.INSTANCE.getMemoryMetricList()); | |
jvmBuilder.addAllMemoryPool(MemoryPoolProvider.INSTANCE.getMemoryPoolMetricsList()); | |
jvmBuilder.addAllGc(GCProvider.INSTANCE.getGCList()); | |
jvmBuilder.setThread(ThreadProvider.INSTANCE.getThreadMetrics()); | |
// 设置 class 的指标 | |
jvmBuilder.setClazz(ClassProvider.INSTANCE.getClassMetrics()); | |
// 将 JVM 的指标放在阻塞队列中 | |
// org.apache.skywalking.apm.agent.core.jvm.JVMMetricsSender#run 办法,会将相干信息发送至 oap server | |
sender.offer(jvmBuilder.build()); | |
} catch (Exception e) {LOGGER.error(e, "Collect JVM info fail."); | |
} | |
} |
创立 Source
类
public class DefaultScopeDefine { | |
public static final int SERVICE_INSTANCE_JVM_CLASS = 11000; | |
/** Catalog of scope, the metrics processor could use this to group all generated metrics by oal rt. */ | |
public static final String SERVICE_INSTANCE_CATALOG_NAME = "SERVICE_INSTANCE"; | |
} |
package org.apache.skywalking.oap.server.core.source; | |
import lombok.Getter; | |
import lombok.Setter; | |
import static org.apache.skywalking.oap.server.core.source.DefaultScopeDefine.SERVICE_INSTANCE_CATALOG_NAME; | |
import static org.apache.skywalking.oap.server.core.source.DefaultScopeDefine.SERVICE_INSTANCE_JVM_CLASS; | |
@ScopeDeclaration(id = SERVICE_INSTANCE_JVM_CLASS, name = "ServiceInstanceJVMClass", catalog = SERVICE_INSTANCE_CATALOG_NAME) | |
@ScopeDefaultColumn.VirtualColumnDefinition(fieldName = "entityId", columnName = "entity_id", isID = true, type = String.class) | |
public class ServiceInstanceJVMClass extends Source { | |
@Override | |
public int scope() {return SERVICE_INSTANCE_JVM_CLASS;} | |
@Override | |
public String getEntityId() {return String.valueOf(id); | |
} | |
@Getter @Setter | |
private String id; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "name", requireDynamicActive = true) | |
private String name; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "service_name", requireDynamicActive = true) | |
private String serviceName; | |
@Getter @Setter @ScopeDefaultColumn.DefinedByField(columnName = "service_id") | |
private String serviceId; | |
@Getter @Setter | |
private long loadedClassCount; | |
@Getter @Setter | |
private long unloadedClassCount; | |
@Getter @Setter | |
private long totalLoadedClassCount; | |
} |
将从 agent
获取到的信息,发送至 SourceReceive
在 org.apache.skywalking.oap.server.analyzer.provider.jvm.JVMSourceDispatcher
进行如下批改
public void sendMetric(String service, String serviceInstance, JVMMetric metrics) {long minuteTimeBucket = TimeBucket.getMinuteTimeBucket(metrics.getTime()); | |
final String serviceId = IDManager.ServiceID.buildId(service, NodeType.Normal); | |
final String serviceInstanceId = IDManager.ServiceInstanceID.buildId(serviceId, serviceInstance); | |
this.sendToCpuMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getCpu()); | |
this.sendToMemoryMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getMemoryList()); | |
this.sendToMemoryPoolMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getMemoryPoolList()); | |
this.sendToGCMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getGcList()); | |
this.sendToThreadMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getThread()); | |
// class 指标解决 | |
this.sendToClassMetricProcess(service, serviceId, serviceInstance, serviceInstanceId, minuteTimeBucket, metrics.getClazz()); | |
} | |
private void sendToClassMetricProcess(String service, | |
String serviceId, | |
String serviceInstance, | |
String serviceInstanceId, | |
long timeBucket, | |
Class clazz) { | |
// 拼装 Source 对象 | |
ServiceInstanceJVMClass serviceInstanceJVMClass = new ServiceInstanceJVMClass(); | |
serviceInstanceJVMClass.setId(serviceInstanceId); | |
serviceInstanceJVMClass.setName(serviceInstance); | |
serviceInstanceJVMClass.setServiceId(serviceId); | |
serviceInstanceJVMClass.setServiceName(service); | |
serviceInstanceJVMClass.setLoadedClassCount(clazz.getLoadedClassCount()); | |
serviceInstanceJVMClass.setUnloadedClassCount(clazz.getUnloadedClassCount()); | |
serviceInstanceJVMClass.setTotalLoadedClassCount(clazz.getTotalLoadedClassCount()); | |
serviceInstanceJVMClass.setTimeBucket(timeBucket); | |
// 将 Source 对象发送至 SourceReceive 进行解决 | |
sourceReceiver.receive(serviceInstanceJVMClass); | |
} |
在 OAL
词法定义和语法定义中退出 Source
相干信息
在 oap-server/oal-grammar/src/main/antlr4/org/apache/skywalking/oal/rt/grammar/OALLexer.g4
定义 Class
关键字
// Keywords | |
FROM: 'from'; | |
FILTER: 'filter'; | |
DISABLE: 'disable'; | |
SRC_ALL: 'All'; | |
SRC_SERVICE: 'Service'; | |
SRC_SERVICE_INSTANCE: 'ServiceInstance'; | |
SRC_ENDPOINT: 'Endpoint'; | |
SRC_SERVICE_RELATION: 'ServiceRelation'; | |
SRC_SERVICE_INSTANCE_RELATION: 'ServiceInstanceRelation'; | |
SRC_ENDPOINT_RELATION: 'EndpointRelation'; | |
SRC_SERVICE_INSTANCE_JVM_CPU: 'ServiceInstanceJVMCPU'; | |
SRC_SERVICE_INSTANCE_JVM_MEMORY: 'ServiceInstanceJVMMemory'; | |
SRC_SERVICE_INSTANCE_JVM_MEMORY_POOL: 'ServiceInstanceJVMMemoryPool'; | |
SRC_SERVICE_INSTANCE_JVM_GC: 'ServiceInstanceJVMGC'; | |
SRC_SERVICE_INSTANCE_JVM_THREAD: 'ServiceInstanceJVMThread'; | |
SRC_SERVICE_INSTANCE_JVM_CLASS:'ServiceInstanceJVMClass'; // 在 OAL 词法定义中增加 Class 的关键字 | |
SRC_DATABASE_ACCESS: 'DatabaseAccess'; | |
SRC_SERVICE_INSTANCE_CLR_CPU: 'ServiceInstanceCLRCPU'; | |
SRC_SERVICE_INSTANCE_CLR_GC: 'ServiceInstanceCLRGC'; | |
SRC_SERVICE_INSTANCE_CLR_THREAD: 'ServiceInstanceCLRThread'; | |
SRC_ENVOY_INSTANCE_METRIC: 'EnvoyInstanceMetric'; |
在 oap-server/oal-grammar/src/main/antlr4/org/apache/skywalking/oal/rt/grammar/OALParser.g4
增加 Class
关键字
source | |
: SRC_ALL | SRC_SERVICE | SRC_DATABASE_ACCESS | SRC_SERVICE_INSTANCE | SRC_ENDPOINT | | |
SRC_SERVICE_RELATION | SRC_SERVICE_INSTANCE_RELATION | SRC_ENDPOINT_RELATION | | |
SRC_SERVICE_INSTANCE_JVM_CPU | SRC_SERVICE_INSTANCE_JVM_MEMORY | SRC_SERVICE_INSTANCE_JVM_MEMORY_POOL | | |
SRC_SERVICE_INSTANCE_JVM_GC | SRC_SERVICE_INSTANCE_JVM_THREAD | SRC_SERVICE_INSTANCE_JVM_CLASS |// 在 OAL 语法定义中增加词法定义中定义的关键字 | |
SRC_SERVICE_INSTANCE_CLR_CPU | SRC_SERVICE_INSTANCE_CLR_GC | SRC_SERVICE_INSTANCE_CLR_THREAD | | |
SRC_ENVOY_INSTANCE_METRIC | | |
SRC_BROWSER_APP_PERF | SRC_BROWSER_APP_PAGE_PERF | SRC_BROWSER_APP_SINGLE_VERSION_PERF | | |
SRC_BROWSER_APP_TRAFFIC | SRC_BROWSER_APP_PAGE_TRAFFIC | SRC_BROWSER_APP_SINGLE_VERSION_TRAFFIC | |
; |
在 oap-server/oal-grammar
目录下执行 mvn clean package -DskipTests=true
会生成新的相干 Java
类
定义 OAL
指标
在 oap-server/server-bootstrap/src/main/resources/oal/java-agent.oal
中增加基于 OAL
语法的 Class
相干指标定义
// 以后加载类的数量 | |
instance_jvm_class_loaded_class_count = from(ServiceInstanceJVMClass.loadedClassCount).longAvg(); | |
// 已卸载类的数量 | |
instance_jvm_class_unloaded_class_count = from(ServiceInstanceJVMClass.unloadedClassCount).longAvg(); | |
// 一共加载类的数量 | |
instance_jvm_class_total_loaded_class_count = from(ServiceInstanceJVMClass.totalLoadedClassCount).longAvg(); |
配置 UI
面板
将如下界面配置导入 APM
面板中
{ | |
"name": "Instance", | |
"children": [{ | |
"width": "3", | |
"title": "Service Instance Load", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "service_instance_cpm", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "CPM - calls per minute" | |
}, | |
{ | |
"width": 3, | |
"title": "Service Instance Throughput", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "service_instance_throughput_received,service_instance_throughput_sent", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "Bytes" | |
}, | |
{ | |
"width": "3", | |
"title": "Service Instance Successful Rate", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "service_instance_sla", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "%", | |
"aggregation": "/", | |
"aggregationNum": "100" | |
}, | |
{ | |
"width": "3", | |
"title": "Service Instance Latency", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "service_instance_resp_time", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "ms" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM CPU (Java Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_jvm_cpu", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "%", | |
"aggregation": "+", | |
"aggregationNum": "" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM Memory (Java Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_jvm_memory_heap, instance_jvm_memory_heap_max,instance_jvm_memory_noheap, instance_jvm_memory_noheap_max", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "MB", | |
"aggregation": "/", | |
"aggregationNum": "1048576" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM GC Time", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_jvm_young_gc_time, instance_jvm_old_gc_time", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "ms" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM GC Count", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartBar", | |
"metricName": "instance_jvm_young_gc_count, instance_jvm_old_gc_count" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM Thread Count (Java Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"metricName": "instance_jvm_thread_live_count, instance_jvm_thread_daemon_count, instance_jvm_thread_peak_count,instance_jvm_thread_deadlocked,instance_jvm_thread_monitor_deadlocked" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM Thread State Count (Java Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_jvm_thread_new_thread_count,instance_jvm_thread_runnable_thread_count,instance_jvm_thread_blocked_thread_count,instance_jvm_thread_wait_thread_count,instance_jvm_thread_time_wait_thread_count,instance_jvm_thread_terminated_thread_count", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartBar" | |
}, | |
{ | |
"width": 3, | |
"title": "JVM Class Count (Java Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_jvm_class_loaded_class_count,instance_jvm_class_unloaded_class_count,instance_jvm_class_total_loaded_class_count", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartArea" | |
}, | |
{ | |
"width": 3, | |
"title": "CLR CPU (.NET Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_clr_cpu", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "%" | |
}, | |
{ | |
"width": 3, | |
"title": "CLR GC (.NET Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_clr_gen0_collect_count, instance_clr_gen1_collect_count, instance_clr_gen2_collect_count", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartBar" | |
}, | |
{ | |
"width": 3, | |
"title": "CLR Heap Memory (.NET Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"metricName": "instance_clr_heap_memory", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"unit": "MB", | |
"aggregation": "/", | |
"aggregationNum": "1048576" | |
}, | |
{ | |
"width": 3, | |
"title": "CLR Thread (.NET Service)", | |
"height": "250", | |
"entityType": "ServiceInstance", | |
"independentSelector": false, | |
"metricType": "REGULAR_VALUE", | |
"queryMetricType": "readMetricsValues", | |
"chartType": "ChartLine", | |
"metricName": "instance_clr_available_completion_port_threads,instance_clr_available_worker_threads,instance_clr_max_completion_port_threads,instance_clr_max_worker_threads" | |
} | |
] | |
} |
后果校验
能够看到导入的界面中,曾经有 Class
相干指标了
代码奉献
- Add some new thread metric and class metric to JVMMetric #7230
- add some new thread metric and class metric to JVMMetric #52
- Remove Terminated State and New State in JVMMetric (#7230) #53
- Add some new thread metric and class metric to JVMMetric (#7230) #7243
参考文档
- Observability Analysis Language
- Source and Scope extension for new metrics
- Java ManagementFactory 解析
分享并记录所学所见