Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,49 @@
[![License](https://img.shields.io/badge/LICENSE-Apache2.0-ff69b4.svg)](http://www.apache.org/licenses/LICENSE-2.0.html)


distributed monitoring system
​&nbsp;​&nbsp;​&nbsp;​&nbsp;​&nbsp;​&nbsp;OWL 是由国内领先的第三方数据智能服务商 [TalkingData](<https://www.talkingdata.com/>) 开源的一款企业级分布式监控告警系统,目前由 Tech Operation Team 持续开发更新维护。

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;OWL 后台组件全部使用 [Go](https://golang.org/) 语言开发,Go 语言是 Google 开发的一种静态强类型、编译型、并发型,并具有垃圾回收功能的编程语言,它的并发机制可以充分利用多核,同平台一次编译可以到处运行,运维成本极低,更多的信息可以参考[官方文档](https://golang.org/doc/)。前端页面使用 [iView](<https://github.com/iview/iview>) 开发,iView 同样是由 TalkingData 开源的一套基于 Vue.js 的 UI 组件库,主要服务于 PC 界面的中后台产品。

OWL是TalkingData公司推出的一款开源分布式监控系统

## Features

- Go语言开发,部署维护简单
- 分布式,支持多机房
- 多维的数据模型,类opentsdb
- 支持多种报警算法,支持多条件组合、时间范围、报警模板等
- 灵活的插件机制,支持任意语言编写,支持传参,自动同步
- 丰富的报警渠道,邮件、微信、短信、电话、自定义
- 原始数据永久存储,支持发送到opentsdb、kairosdb、kafka
- 自带web管理界面以及强大的自定义图表功能
- 灵活的插件机制,支持任意语言编写,支持传参,自动同步到客户端
- 丰富的报警渠道,邮件、企业微信、短信、电话以及自定义脚本
- 原始数据永久存储,支持发送到 opentsdb、kairosdb、kafka
- 自带 web 管理界面以及强大的自定义图表功能能

## Architecture
![owl](./arch.png)


## Components

**agent**:安装在每台被监控机器上,用于采集监控数据

**netcollect**:通过 SNMP V2 采集网络设备的接口数据

**repeater**:接收 `agent` 发送过来的监控数据,并写入后端存储

**cfc**:维护客户端需要执行的插件列表,主机名 、ip地址更新以及采集到的指标列表

**controller**:从数据库加载告警策略,生成任务发送给 `inspector`,并且根据执行结果进行告警

**inspector**:从 `controller` 获取监控任务,根据 `tsdb` 中的数据进行计算,并将结果返回 `controller`

**api**:对外提供 http rest api接口,web 页面就是通过它来获取数据

**MySQL**:所有配置信息的持久化存储,包含主机信息,告警策略,主机组,人员等

**TSDB**:时序数据库(time seires database),用于存储采集到的监控数据

**frontend**:web 管理页面,可以方便的进行系统管理维护工作


## Demo

http://54.223.127.87/
Expand Down
17 changes: 12 additions & 5 deletions api/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func queryTimeSeriesData(c *gin.Context) {
metric := c.Query("metric")
tags := c.Query("tags")
tagMap := types.ParseTags(tags)
if groupName, exist := tagMap["host_group"]; exist {
if groupNames, exist := tagMap["host_group"]; exist {
productIDStr, ok := c.GetQuery("product_id")
if !ok {
response["code"] = http.StatusNotFound
Expand All @@ -49,14 +49,21 @@ func queryTimeSeriesData(c *gin.Context) {
return
}
delete(tagMap, "host_group")
hostSet := getHostnameTagsFromProductGroup(productID, groupName)
var hostSet []string
for _, groupName := range strings.Split(groupNames, "|") {
hostSet = append(hostSet, getHostnameTagsFromProductGroup(productID, groupName)...)
}
if len(hostSet) == 0 {
response["code"] = http.StatusBadRequest
response["message"] = groupName + " has no host"
response["message"] = "all group has no host"
return
}

tagMap["host"] = strings.Join(hostSet, "|")
hosts := strings.Join(hostSet, "|")
// 如果存在 tag host, merge
if host, ok := tagMap["host"]; ok {
hosts = hosts + "|" + host
}
tagMap["host"] = hosts
tags = Tags2String(tagMap)
}

Expand Down
5 changes: 5 additions & 0 deletions api/host_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ func listNotInProductHostGroupHosts(c *gin.Context) {
func listProductHostGroups(c *gin.Context) {
response := gin.H{"code": http.StatusOK}
defer c.JSON(http.StatusOK, response)
var username string
if c.DefaultQuery("my", "false") == "true" {
username = c.GetString("username")
}
total, hostGroups := mydb.getProductHostGroups(
c.GetInt("product_id"),
c.GetBool("paging"),
c.GetString("query"),
username,
c.GetString("order"),
c.GetInt("offset"),
c.GetInt("limit"),
Expand Down
9 changes: 7 additions & 2 deletions api/mysql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1551,20 +1551,25 @@ func (d *db) removeHostsFromProduct(productID int, ids []string) (err error) {
}

//获取产品线下的主机组
func (d *db) getProductHostGroups(productID int, paging bool, query string, order string, offset, limit int) (int, []WarpHostGroup) {
func (d *db) getProductHostGroups(productID int, paging bool, query string, user string, order string, offset, limit int) (int, []WarpHostGroup) {
var (
groups = make([]WarpHostGroup, 0)
err error
cnt int
rawSQL string
)
rawSQL := fmt.Sprintf("select hg.id, hg.name, hg.description, hg.creator, DATE_FORMAT(hg.create_at,'%s') as create_at,"+
rawSQL = fmt.Sprintf("select hg.id, hg.name, hg.description, hg.creator, DATE_FORMAT(hg.create_at,'%s') as create_at,"+
"DATE_FORMAT(hg.update_at,'%s') as update_at, count(distinct host_group_plugin.id) as plugin_cnt, "+
"count(distinct host_group_host.id) as host_cnt, count(distinct strategy_group.id) as strategy_cnt "+
" from host_group as hg left join host_group_plugin on hg.id = host_group_plugin.group_id left join host_group_host "+
" on hg.id = host_group_host.host_group_id left join strategy_group on hg.id=strategy_group.group_id "+
" where hg.product_id=%d",
dbDateFormat, dbDateFormat, productID)
cntSQL := fmt.Sprintf("select count(*) from host_group where product_id = %d", productID)
if len(user) > 0 {
rawSQL = fmt.Sprintf("%s and hg.creator='%s'", rawSQL, user)
cntSQL = fmt.Sprintf("%s and creator='%s'", cntSQL, user)
}
if len(query) > 0 {
rawSQL = fmt.Sprintf("%s and hg.name like '%%%s%%'", rawSQL, query)
cntSQL = fmt.Sprintf("%s and name like '%%%s%%'", cntSQL, query)
Expand Down