You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

462 lines
12 KiB

  1. package main
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "log"
  7. "net/http"
  8. _ "net/http/pprof"
  9. "os"
  10. "os/signal"
  11. "strings"
  12. "syscall"
  13. "time"
  14. "github.com/go-redis/redis/v8"
  15. dq "github.com/nsqio/go-diskqueue"
  16. )
  17. const (
  18. DefaultWatermarkHigh int64 = 100000
  19. DefaultWatermarkLow int64 = 100000
  20. DefaultBatchSize int64 = 10000
  21. )
  22. func l(_ dq.LogLevel, f string, args ...interface{}) {
  23. log.Printf(f, args...)
  24. }
  25. type ProjectRedisConfig struct {
  26. Host string `json:"host"`
  27. Pass string `json:"pass"`
  28. Port int `json:"port"`
  29. }
  30. type ProjectOffloadConfig struct {
  31. WatermarkHigh int64 `json:"high"`
  32. WatermarkMiddle int64 `json:"middle"`
  33. WatermarkLow int64 `json:"low"`
  34. BatchSize int64 `json:"batchsize"`
  35. }
  36. type ProjectConfig struct {
  37. RedisConfig *ProjectRedisConfig `json:"redis,omitempty"`
  38. OffloadConfig ProjectOffloadConfig `json:"offload"`
  39. }
  40. type Offloader struct {
  41. RedisClient *redis.Client
  42. ProjectConfig ProjectConfig
  43. OffloadConfig ProjectOffloadConfig
  44. Context context.Context
  45. Cancel context.CancelFunc
  46. Done chan bool
  47. Queues map[string]dq.Interface
  48. Sets map[string]string
  49. Name string
  50. }
  51. func (that *Offloader) CleanName(s string) string {
  52. return strings.ReplaceAll(strings.ReplaceAll(s, "/", "_"), "\x00", "_")
  53. }
  54. func (that *Offloader) RedisConfigDiffers(new *ProjectRedisConfig) bool {
  55. if that.ProjectConfig.RedisConfig == nil && new == nil {
  56. return false
  57. }
  58. if that.ProjectConfig.RedisConfig == nil || new == nil || that.ProjectConfig.RedisConfig.Host != new.Host || that.ProjectConfig.RedisConfig.Port != new.Port || that.ProjectConfig.RedisConfig.Pass != new.Pass {
  59. return true
  60. }
  61. return false
  62. }
  63. func (that *Offloader) OffloadConfigDiffers(new ProjectOffloadConfig) bool {
  64. return that.OffloadConfig.WatermarkHigh != new.WatermarkHigh || that.OffloadConfig.WatermarkMiddle != new.WatermarkMiddle || that.OffloadConfig.WatermarkLow != new.WatermarkLow || that.OffloadConfig.BatchSize != new.BatchSize
  65. }
  66. func (that *Offloader) RefreshQueues() {
  67. pipe := that.RedisClient.Pipeline()
  68. prioritiesCmdRes := pipe.ZRange(that.Context, fmt.Sprintf("%s:priorities", that.Name), 0, -1)
  69. filtersCmdRes := pipe.SMembers(that.Context, fmt.Sprintf("%s:filters", that.Name))
  70. _, err := pipe.Exec(that.Context)
  71. if err != nil {
  72. log.Printf("unable to refresh queues for offloader %s: %s", that.Name, err)
  73. return
  74. }
  75. priorities, err := prioritiesCmdRes.Result()
  76. if err != nil {
  77. log.Printf("unable to refresh queues for offloader %s: %s", that.Name, err)
  78. return
  79. }
  80. filters, err := filtersCmdRes.Result()
  81. if err != nil {
  82. log.Printf("unable to refresh queues for offloader %s: %s", that.Name, err)
  83. return
  84. }
  85. setQueueMap := map[string]string{
  86. "todo": "todo",
  87. "todo:secondary": "todo:secondary",
  88. "todo:redo": "todo:redo",
  89. "done": "done",
  90. "unretrievable": "unretrievable",
  91. }
  92. for _, filter := range filters {
  93. setQueueMap[fmt.Sprintf("filtered:%s", filter)] = "filtered"
  94. }
  95. for _, priority := range priorities {
  96. setQueueMap[fmt.Sprintf("todo:prio:%s", priority)] = fmt.Sprintf("todo:prio:%s", priority)
  97. }
  98. needQueueMap := map[string]bool{}
  99. for setName, queueName := range setQueueMap {
  100. needQueueMap[queueName] = true
  101. if _, has := that.Queues[queueName]; !has {
  102. log.Printf("opening queue %s for %s:%s", queueName, that.Name, setName)
  103. that.Queues[queueName] = dq.New(fmt.Sprintf("%s:%s", that.Name, that.CleanName(queueName)), dataDir, 128*1024*1024, 0, 128*1024*1024, 1_000_000, 5*time.Second, l)
  104. }
  105. that.Sets[setName] = queueName
  106. }
  107. for k, v := range that.Queues {
  108. if _, has := needQueueMap[k]; !has {
  109. v.Close()
  110. delete(that.Queues, k)
  111. }
  112. }
  113. }
  114. func (that *Offloader) CloseQueues() {
  115. for k, q := range that.Queues {
  116. log.Printf("closing queue %s for %s", k, that.Name)
  117. q.Close()
  118. }
  119. }
  120. func (that *Offloader) UpdateStats() {
  121. hset := map[string]interface{}{}
  122. for k, q := range that.Sets {
  123. if k != q {
  124. continue
  125. }
  126. hset[k] = fmt.Sprintf("%d", that.Queues[q].Depth())
  127. }
  128. _, err := that.RedisClient.HSet(that.Context, fmt.Sprintf("%s:offloaded", that.Name), hset).Result()
  129. if err != nil {
  130. log.Printf("unable to hmset %s:offloaded: %s", that.Name, err)
  131. }
  132. }
  133. func (that *Offloader) Do() {
  134. defer close(that.Done)
  135. defer that.Cancel()
  136. if that.ProjectConfig.RedisConfig != nil {
  137. defer that.RedisClient.Close()
  138. }
  139. that.Sets = map[string]string{}
  140. that.Queues = map[string]dq.Interface{}
  141. defer that.CloseQueues()
  142. ticker := time.NewTicker(1 * time.Second)
  143. defer ticker.Stop()
  144. refreshTicker := time.NewTicker(5 * time.Minute)
  145. defer refreshTicker.Stop()
  146. that.RefreshQueues()
  147. that.UpdateStats()
  148. skipSleepChan := make(chan bool, 1)
  149. defer close(skipSleepChan)
  150. watermarkHigh := that.OffloadConfig.WatermarkHigh
  151. if watermarkHigh == 0 {
  152. watermarkHigh = DefaultWatermarkHigh
  153. }
  154. watermarkLow := that.OffloadConfig.WatermarkLow
  155. if watermarkLow == 0 {
  156. watermarkLow = DefaultWatermarkLow
  157. }
  158. batchSize := that.OffloadConfig.BatchSize
  159. if batchSize == 0 {
  160. batchSize = DefaultBatchSize
  161. }
  162. for {
  163. //for k, q := range that.Queues {
  164. // key := fmt.Sprintf("%s:%s", that.Name, k)
  165. // scard, err := that.RedisClient.SCard(that.Context, key).Result()
  166. // if err != nil {
  167. // log.Printf("unable to scard %s: %s", key, err)
  168. // continue
  169. // }
  170. // for scard > watermarkHigh || scard < watermarkLow {
  171. // select {
  172. // case <-that.Context.Done():
  173. // return
  174. // case <-refreshTicker.C:
  175. // that.RefreshQueues()
  176. // that.UpdateStats()
  177. // default:
  178. // }
  179. // if scard > watermarkHigh {
  180. // spopLimit := scard - watermarkHigh
  181. // if spopLimit > batchSize {
  182. // spopLimit = batchSize
  183. // }
  184. // ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  185. // entries, err := that.RedisClient.SPopN(ctx, key, spopLimit).Result()
  186. // cancel()
  187. // if err != nil {
  188. // log.Printf("unable to spop %s: %s", key, err)
  189. // }
  190. // scard = scard - int64(len(entries))
  191. // for _, entry := range entries {
  192. // err := q.Put([]byte(entry))
  193. // if err != nil {
  194. // log.Printf("unable to q.Put %s: %s", key, err)
  195. // return
  196. // }
  197. // }
  198. // } else if scard < watermarkLow {
  199. // spopLimit := watermarkLow - scard
  200. // if spopLimit > batchSize {
  201. // spopLimit = batchSize
  202. // }
  203. // var entries []interface{}
  204. // for q.Depth() > 0 && int64(len(entries)) < spopLimit {
  205. // entry := <-q.ReadChan()
  206. // entries = append(entries, string(entry))
  207. // }
  208. // if len(entries) == 0 {
  209. // break
  210. // }
  211. // ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
  212. // _, err := that.RedisClient.SAdd(ctx, key, entries...).Result()
  213. // cancel()
  214. // if err != nil {
  215. // log.Printf("unable to sadd %s %#v: %s", key, entries, err)
  216. // for _, entry := range entries {
  217. // err := q.Put([]byte(entry.(string)))
  218. // if err != nil {
  219. // log.Printf("unable to q.Put %s: %s", key, err)
  220. // }
  221. // }
  222. // return
  223. // }
  224. // scard = scard + int64(len(entries))
  225. // }
  226. // }
  227. //}
  228. scards := map[string]*redis.IntCmd{}
  229. pipe := that.RedisClient.Pipeline()
  230. for k := range that.Sets {
  231. key := fmt.Sprintf("%s:%s", that.Name, k)
  232. scards[k] = pipe.SCard(that.Context, key)
  233. }
  234. _, err := pipe.Exec(that.Context)
  235. if err != nil {
  236. log.Printf("unable to scard %s: %s", that.Name, err)
  237. } else {
  238. rerun := false
  239. for k, q := range that.Sets {
  240. key := fmt.Sprintf("%s:%s", that.Name, k)
  241. scard, err := scards[k].Result()
  242. if err != nil {
  243. log.Printf("unable to scard %s: %s", key, err)
  244. continue
  245. }
  246. if scard > watermarkHigh {
  247. spopLimit := scard - watermarkHigh
  248. if spopLimit > batchSize {
  249. spopLimit = batchSize
  250. }
  251. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  252. entries, err := that.RedisClient.SPopN(ctx, key, spopLimit).Result()
  253. cancel()
  254. if err != nil {
  255. log.Printf("unable to spop %s: %s", key, err)
  256. }
  257. if len(entries) == 0 {
  258. continue
  259. }
  260. for _, entry := range entries {
  261. err := that.Queues[q].Put([]byte(entry))
  262. if err != nil {
  263. log.Printf("unable to q.Put %s: %s", key, err)
  264. return
  265. }
  266. }
  267. rerun = true
  268. } else if k == q && scard < watermarkLow && that.Queues[q].Depth() > 0 {
  269. spopLimit := watermarkLow - scard
  270. if spopLimit > batchSize {
  271. spopLimit = batchSize
  272. }
  273. var entries []interface{}
  274. for that.Queues[q].Depth() > 0 && int64(len(entries)) < spopLimit {
  275. entry := <-that.Queues[q].ReadChan()
  276. entries = append(entries, string(entry))
  277. }
  278. if len(entries) == 0 {
  279. continue
  280. }
  281. ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute)
  282. _, err := that.RedisClient.SAdd(ctx, key, entries...).Result()
  283. cancel()
  284. if err != nil {
  285. log.Printf("unable to sadd %s: %s", key, err)
  286. for _, entry := range entries {
  287. err := that.Queues[q].Put([]byte(entry.(string)))
  288. if err != nil {
  289. log.Printf("unable to q.Put %s: %s", key, err)
  290. }
  291. }
  292. return
  293. }
  294. rerun = true
  295. }
  296. }
  297. if rerun {
  298. select {
  299. case skipSleepChan <- true:
  300. default:
  301. }
  302. }
  303. that.UpdateStats()
  304. }
  305. select {
  306. case <-that.Context.Done():
  307. return
  308. case <-refreshTicker.C:
  309. that.RefreshQueues()
  310. that.UpdateStats()
  311. case <-ticker.C:
  312. that.UpdateStats()
  313. case <-skipSleepChan:
  314. }
  315. }
  316. }
  317. var offloaders = map[string]*Offloader{}
  318. func StopProjects() {
  319. var doneChans []chan bool
  320. for project, offloader := range offloaders {
  321. log.Printf("stopping offloader %s", project)
  322. offloader.Cancel()
  323. doneChans = append(doneChans, offloader.Done)
  324. }
  325. for _, c := range doneChans {
  326. <-c
  327. }
  328. }
  329. func RefreshProjects(redisClient *redis.Client) {
  330. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  331. res, err := redisClient.HGetAll(ctx, "trackers").Result()
  332. cancel()
  333. if err != nil {
  334. log.Printf("unable to refresh trackers table: %s", err)
  335. return
  336. }
  337. updatedProjects := map[string]ProjectConfig{}
  338. for project, configString := range res {
  339. //if project != "ua" && project != "ukr-net" && project != "ua-urls" {
  340. // continue
  341. //}
  342. config := ProjectConfig{}
  343. err := json.Unmarshal([]byte(configString), &config)
  344. if err != nil {
  345. log.Printf("unable to decode project %s config: %s", project, err)
  346. continue
  347. }
  348. updatedProjects[project] = config
  349. }
  350. for project, offloader := range offloaders {
  351. _, stopRequired := updatedProjects[project]
  352. stopRequired = !stopRequired
  353. if !stopRequired {
  354. stopRequired = offloader.OffloadConfigDiffers(updatedProjects[project].OffloadConfig)
  355. if !stopRequired {
  356. stopRequired = offloader.RedisConfigDiffers(updatedProjects[project].RedisConfig)
  357. if !stopRequired {
  358. select {
  359. case <-offloader.Context.Done():
  360. stopRequired = true
  361. case <-offloader.Done:
  362. stopRequired = true
  363. default:
  364. }
  365. }
  366. }
  367. }
  368. if stopRequired {
  369. log.Printf("stopping offloader %s", project)
  370. offloader.Cancel()
  371. <-offloader.Done
  372. delete(offloaders, project)
  373. }
  374. }
  375. for project, config := range updatedProjects {
  376. if _, has := offloaders[project]; !has {
  377. log.Printf("starting offloader %s", project)
  378. offloader := &Offloader{}
  379. offloader.Name = project
  380. offloader.ProjectConfig = config
  381. if config.RedisConfig != nil {
  382. offloader.RedisClient = redis.NewClient(&redis.Options{
  383. Addr: fmt.Sprintf("%s:%d", config.RedisConfig.Host, config.RedisConfig.Port),
  384. Username: "default",
  385. Password: config.RedisConfig.Pass,
  386. ReadTimeout: 15 * time.Minute,
  387. })
  388. } else {
  389. offloader.RedisClient = redisClient
  390. }
  391. offloader.Context, offloader.Cancel = context.WithCancel(context.Background())
  392. offloader.Done = make(chan bool)
  393. offloaders[project] = offloader
  394. go offloader.Do()
  395. }
  396. }
  397. }
  398. var dataDir string
  399. func main() {
  400. log.SetFlags(log.Flags() | log.Lshortfile)
  401. go func() {
  402. if err := http.ListenAndServe("127.0.0.1:16992", nil); err != nil {
  403. log.Printf("webserver error: %s", err)
  404. }
  405. }()
  406. dataDir = os.Getenv("DATA_DIR")
  407. if dataDir == "" {
  408. log.Panicf("no DATA_DIR specified")
  409. }
  410. mainOptions, err := redis.ParseURL(os.Getenv("REDIS_URL"))
  411. if err != nil {
  412. log.Panicf("%s", err)
  413. }
  414. mainOptions.ReadTimeout = 15 * time.Minute
  415. mainClient := redis.NewClient(mainOptions)
  416. sc := make(chan os.Signal, 1)
  417. signal.Notify(sc, syscall.SIGINT, syscall.SIGTERM, os.Interrupt, os.Kill)
  418. ticker := time.NewTicker(1 * time.Minute)
  419. for {
  420. RefreshProjects(mainClient)
  421. select {
  422. case <-sc:
  423. StopProjects()
  424. return
  425. case <-ticker.C:
  426. }
  427. }
  428. }