watchdog.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /******************************************************************************
  2. 版权所有:
  3. 文件名称: watchdog.c
  4. 文件版本: 01.00
  5. 创建作者: sunxi
  6. 创建日期: 2022-05-19
  7. 功能说明:
  8. watchdog 的使用说明:
  9. 1.本看门狗只侦察本进程中的各个线程的运行情况;
  10. 2.如何使用,请参考watchdog_test();
  11. 3. 在阻塞型的线程中,不能加入watchdog;
  12. 4. 本狗为应用狗;
  13. 其它说明:
  14. 修改记录:
  15. */
  16. /*------------------------------- 头文件 --------------------------------------
  17. */
  18. #include <stdio.h>
  19. #include <unistd.h>
  20. #include <string.h>
  21. #include <stdlib.h>
  22. #include <errno.h>
  23. #include <signal.h>
  24. #include <pthread.h>
  25. #include <time.h>
  26. #include <dirent.h>
  27. #include <signal.h>
  28. #include <sys/prctl.h>
  29. #include "../include/bspconfig.h"
  30. #include "rt.h"
  31. #include "watchdog.h"
  32. #include "shm_comm_packet.h"
  33. #include "shm.h"
  34. // #define WD_PATH "/tmp/wdt_record"
  35. #define WD_PATH "/app/data/wdt_record" // noted by sunxi: 保存在flash中,重启后,不会掉失
  36. #define WDT_MAX_ITEMS 64
  37. #define wd_info_str ""
  38. // 心跳结构体
  39. typedef struct HB_T
  40. {
  41. uint16_t A;
  42. uint16_t B;
  43. uint16_t heartbeat_A; // 每秒加1
  44. uint16_t heartbeat_B; // 与heartbeat_A相同
  45. uint16_t crc;
  46. } HB_T;
  47. // 裸核程序空间写完标志结构体
  48. typedef struct SHM_E907_PRO_W_F_T
  49. {
  50. uint16_t A;
  51. uint16_t B;
  52. uint16_t finish_flag; // 裸核程序空间写完时,0x55写入finish_flag.
  53. uint32_t len; // 裸核程序的实际长度
  54. uint16_t crc;
  55. } SHM_E907_PRO_W_F_T;
  56. typedef struct _wdt_item
  57. {
  58. const char *name;
  59. uint8_t feed;
  60. uint32_t runing; // 计时器
  61. uint32_t period; // 秒
  62. } wdt_item_t;
  63. struct ap_watchdog_t
  64. {
  65. int (*add_item)(const char *name, uint32_t *id, uint32_t period);
  66. int (*remove_item)(uint32_t id);
  67. int (*feed)(uint32_t id);
  68. int (*set_period)(uint32_t id, uint32_t seconds);
  69. uint32_t (*get_period)(uint32_t id);
  70. int (*start)(void);
  71. int (*stop)(void);
  72. int (*is_start)(void);
  73. int (*record)(const char *name);
  74. void (*set_debug)(int set);
  75. char appName[128];
  76. int _debug;
  77. wdt_item_t items[WDT_MAX_ITEMS];
  78. uint8_t runing_flag;
  79. pthread_mutex_t mutex;
  80. };
  81. static pthread_t pthread_tid = 0;
  82. int watchdog_feed_flag = 0; // 放到心跳线程(与db进行心跳握手),以确保watchdog_thread线程运行正常
  83. struct ap_watchdog_t ap_watchdog;
  84. extern uint32_t wdt_id_mainloop;
  85. // 检查文件夹是否存在
  86. // 不存在,创建
  87. int CreateNewFolder(char *path)
  88. {
  89. DIR *dir;
  90. if (path == NULL)
  91. {
  92. return -1;
  93. }
  94. // 检查文件夹是否存在
  95. dir = opendir(path);
  96. if (NULL == dir)
  97. {
  98. // 不存在,创建
  99. mkdir(path, 0775);
  100. }
  101. else
  102. {
  103. closedir(dir);
  104. }
  105. return 0;
  106. }
  107. int watchdog_init(void)
  108. {
  109. int i;
  110. CreateNewFolder(WD_PATH);
  111. memset(ap_watchdog.items, 0, sizeof(ap_watchdog.items));
  112. for (i = 0; i < WDT_MAX_ITEMS; i++)
  113. ap_watchdog.items[i].feed = 1;
  114. ap_watchdog.runing_flag = 0;
  115. ap_watchdog._debug = 0;
  116. strcpy(ap_watchdog.appName, "dtu_t536");
  117. pthread_mutex_init(&ap_watchdog.mutex, NULL);
  118. ap_watchdog.start();
  119. return 0;
  120. }
  121. int watchdog_exit(void)
  122. {
  123. ap_watchdog.stop();
  124. pthread_join(pthread_tid, NULL);
  125. return 0;
  126. }
  127. // period:周期,单位为秒
  128. static int add_item(const char *name, uint32_t *id, uint32_t period)
  129. {
  130. int i;
  131. if (name == NULL)
  132. return -1;
  133. int ret = -1;
  134. // try to get mutex
  135. struct timespec tout;
  136. clock_gettime(CLOCK_REALTIME, &tout);
  137. tout.tv_sec += 5; // 5s default
  138. if (pthread_mutex_timedlock(&ap_watchdog.mutex, &tout) != 0)
  139. return -1;
  140. for (i = 0; i < WDT_MAX_ITEMS; i++)
  141. {
  142. if (ap_watchdog.items[i].name == NULL)
  143. {
  144. ap_watchdog.items[i].feed = 1;
  145. ap_watchdog.items[i].runing = 0;
  146. ap_watchdog.items[i].period = period;
  147. *id = i;
  148. ap_watchdog.items[i].name = name;
  149. rt_printf("WDT add_item i = %d,name = %s \r\n", i, ap_watchdog.items[i].name);
  150. ret = 0;
  151. break;
  152. }
  153. }
  154. pthread_mutex_unlock(&ap_watchdog.mutex);
  155. return ret;
  156. }
  157. static int remove_item(uint32_t id)
  158. {
  159. if (id > WDT_MAX_ITEMS)
  160. return -1;
  161. // try to get mutex
  162. struct timespec tout;
  163. clock_gettime(CLOCK_REALTIME, &tout);
  164. tout.tv_sec += 5; // 5s default
  165. if (pthread_mutex_timedlock(&ap_watchdog.mutex, &tout) != 0)
  166. return -1;
  167. ap_watchdog.items[id].name = NULL;
  168. ap_watchdog.items[id].feed = 1;
  169. ap_watchdog.items[id].runing = 0;
  170. pthread_mutex_unlock(&ap_watchdog.mutex);
  171. return 0;
  172. }
  173. static int feed(uint32_t id)
  174. {
  175. if (id > WDT_MAX_ITEMS)
  176. return -1;
  177. ap_watchdog.items[id].feed = 1;
  178. return 0;
  179. }
  180. static int set_period(uint32_t id, uint32_t seconds)
  181. {
  182. if (id > WDT_MAX_ITEMS)
  183. return -1;
  184. ap_watchdog.items[id].period = seconds;
  185. return 0;
  186. }
  187. static uint32_t get_period(uint32_t id)
  188. {
  189. if (id > WDT_MAX_ITEMS)
  190. return -1;
  191. return ap_watchdog.items[id].period;
  192. }
  193. // 设置心跳
  194. static int write_freq_hb(uint8_t *data)
  195. {
  196. int ret = 0;
  197. int cnt = 0;
  198. if (data == NULL)
  199. return -1;
  200. while (1)
  201. {
  202. // ret = shm_comm_packet_write(SHM_ADDR_D_HB, data, sizeof(HB_T));
  203. if (ret > 0)
  204. {
  205. break;
  206. }
  207. if (++cnt > 3)
  208. break;
  209. usleep(30);
  210. }
  211. return ret;
  212. }
  213. static int get_e907_PRO_W_F(void)
  214. {
  215. SHM_E907_PRO_W_F_T flag;
  216. int ret = 0;
  217. int cnt = 0;
  218. while (1)
  219. {
  220. ret = shm_comm_packet_read(SHM_ADDR_U_PRO_W_F, sizeof(flag), (uint8_t *)&flag, sizeof(flag));
  221. if (ret > 0)
  222. {
  223. if (flag.finish_flag == 0x55)
  224. return flag.len;
  225. else
  226. return -1;
  227. }
  228. if (++cnt > 3)
  229. break;
  230. usleep(30);
  231. }
  232. return ret;
  233. }
  234. static void *watchdog_thread(void *arg)
  235. {
  236. int i = 0;
  237. static HB_T hb;
  238. int ret = 0;
  239. static int read_flag = 0;
  240. struct file *pfile;
  241. loff_t pos;
  242. memset(&hb, 0, sizeof(hb));
  243. pthread_detach(pthread_self());
  244. prctl(PR_SET_NAME, "watchdog_thread");
  245. while (ap_watchdog.is_start())
  246. {
  247. watchdog_feed_flag = 1;
  248. sleep(1);
  249. // 把裸核程序读出来,并写到tmp文件夹中。
  250. if (read_flag == 0)
  251. {
  252. ret = get_e907_PRO_W_F();
  253. if (ret > 100) // 返回裸核程序的实际长度,所以一定会大于100才有意义
  254. {
  255. // finish
  256. read_flag = 1;
  257. // 从shm中读取裸核程序
  258. // 创建数据文件
  259. pfile = rt_file_open("/tmp/amp_rv0.bin", O_CREAT | O_RDWR, 0);
  260. if (!IS_ERR(pfile))
  261. {
  262. pos = 0;
  263. rt_file_write(pfile, SHM_BASE_R + SHM_ADDR_U_E907_PRO, ret, &pos);
  264. rt_file_close(pfile, 0);
  265. }
  266. }
  267. }
  268. hb.heartbeat_A++;
  269. hb.heartbeat_B = hb.heartbeat_A;
  270. write_freq_hb((uint8_t *)&hb); // 给裸核的心跳
  271. for (i = 0; i < WDT_MAX_ITEMS; i++)
  272. {
  273. if (ap_watchdog.items[i].name != NULL)
  274. {
  275. if (++ap_watchdog.items[i].runing > ap_watchdog.items[i].period)
  276. {
  277. if (ap_watchdog.items[i].feed != 1)
  278. {
  279. ap_watchdog.record(ap_watchdog.items[i].name);
  280. rt_printf("WDT-------%s is timeout!!!\r\n", ap_watchdog.items[i].name);
  281. system("reboot -f");
  282. // exit(EXIT_FAILURE);//TODO. noted by sunxi: 这里需要确认是,是重启应用程序,还是系统reboot!!!
  283. }
  284. else
  285. {
  286. if (ap_watchdog._debug)
  287. {
  288. rt_printf("WDT-------%s is Runing.\r\n", ap_watchdog.items[i].name);
  289. }
  290. }
  291. ap_watchdog.items[i].feed = 0;
  292. ap_watchdog.items[i].runing = 0;
  293. }
  294. }
  295. }
  296. }
  297. return NULL;
  298. }
  299. static int start(void)
  300. {
  301. int ret;
  302. if (ap_watchdog.runing_flag == 1)
  303. {
  304. return 0;
  305. }
  306. ap_watchdog._debug = 0;
  307. ap_watchdog.runing_flag = 1;
  308. ret = pthread_create(&pthread_tid, NULL, watchdog_thread, NULL);
  309. if (ret != 0)
  310. {
  311. rt_printf("can not create thread any more: %d\r\n", errno);
  312. ap_watchdog.runing_flag = 0;
  313. return -1;
  314. }
  315. return 0;
  316. }
  317. static int stop(void)
  318. {
  319. ap_watchdog.runing_flag = 0;
  320. return 0;
  321. }
  322. static int is_start(void)
  323. {
  324. return ap_watchdog.runing_flag;
  325. }
  326. static int record(const char *name)
  327. {
  328. time_t tt;
  329. char buf[512];
  330. FILE *f = NULL;
  331. char rec[256];
  332. char tmpBuf[128];
  333. int size;
  334. if (name == NULL)
  335. return -1;
  336. tt = time(NULL);
  337. memset(buf, 0, sizeof(buf));
  338. sprintf(buf, "%s/wdt_record_%s", WD_PATH, ap_watchdog.appName);
  339. f = fopen(buf, "a");
  340. if (f == NULL)
  341. {
  342. perror("fopen");
  343. rt_printf("fopen is error!!!\r\n");
  344. return -1;
  345. }
  346. memset(tmpBuf, 0, sizeof(tmpBuf));
  347. strftime(tmpBuf, 128, "%Y-%m-%d %H:%M:%S", localtime(&tt));
  348. memset(rec, 0, sizeof(rec));
  349. size = snprintf(rec, sizeof(rec), "%s %s fuck you up; \r\n", tmpBuf, name);
  350. fwrite(rec, size, 1, f);
  351. fclose(f);
  352. return 0;
  353. }
  354. static void set_debug(int set)
  355. {
  356. ap_watchdog._debug = set;
  357. }
  358. struct ap_watchdog_t ap_watchdog =
  359. {
  360. .add_item = add_item,
  361. .remove_item = remove_item,
  362. .feed = feed,
  363. .set_period = set_period,
  364. .get_period = get_period,
  365. .start = start,
  366. .stop = stop,
  367. .is_start = is_start,
  368. .record = record,
  369. .set_debug = set_debug,
  370. };
  371. /*
  372. period: 看门狗的喂狗周期
  373. 因为可能存在某些线程,其运行过程很长,运行的中间,没有合里加sleep等休息函数,
  374. 造成占用时间较长。
  375. 于是,这里的period,可以尽量设置长一点的时间。
  376. */
  377. int watchdog_add_item(const char *name, uint32_t *id, uint32_t period)
  378. {
  379. int ret = 0;
  380. ret = ap_watchdog.add_item(name, id, period);
  381. return ret;
  382. }
  383. int watchdog_feed(uint32_t id)
  384. {
  385. int ret = 0;
  386. ret = ap_watchdog.feed(id);
  387. return ret;
  388. }
  389. int watchdog_remove_item(uint32_t id)
  390. {
  391. int ret = 0;
  392. ret = ap_watchdog.remove_item(id);
  393. return ret;
  394. }
  395. //===================================================================================================================
  396. // test
  397. void *watchdog_test_pthread(void *arg)
  398. {
  399. pthread_detach(pthread_self());
  400. uint32_t wdt_id;
  401. int rc = watchdog_add_item("watchdog_test_pthread", &wdt_id, 10);
  402. if (rc != 0)
  403. {
  404. printf("can not add watchdog_test_pthread task to wdt\r\n");
  405. return NULL;
  406. }
  407. while (1)
  408. {
  409. // 把喂狗操作放在首行,确保每个循环都有喂狗
  410. watchdog_feed(wdt_id); // 去掉本行喂狗,就可以产生记录
  411. sleep(1);
  412. }
  413. watchdog_remove_item(wdt_id);
  414. return NULL;
  415. }
  416. int watchdog_test(void)
  417. {
  418. int ret;
  419. watchdog_init();
  420. pthread_t ConnectCheck_id;
  421. ret = pthread_create(&ConnectCheck_id, NULL, watchdog_test_pthread, NULL);
  422. if (ret != 0)
  423. {
  424. printf("can not create watchdog_test_pthread\r\n");
  425. return -1;
  426. }
  427. return 0;
  428. }
  429. void watchdog_feed_mainloop(void)
  430. {
  431. watchdog_feed(wdt_id_mainloop);
  432. }
  433. void watchdog_feed_protect(void)
  434. {
  435. }
  436. void watchdog_reset_cpu(int i)
  437. {
  438. printf("reboot code: %d .\n", i);
  439. system("reboot -f");
  440. }
  441. void watchdog_feed_mainloop_50s(void)
  442. {
  443. watchdog_feed(wdt_id_mainloop);
  444. }
  445. /*------------------------------- 文件结束 -------------------------------*/