watchdog.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. /******************************************************************************
  2. 版权所有:
  3. 文件名称: watchdog.c
  4. 文件版本: 01.00
  5. 创建作者: sunxi
  6. 创建日期: 2022-05-19
  7. 功能说明:
  8. watchdog 的使用说明:
  9. 1.本看门狗只侦察本进程中的各个线程的运行情况;
  10. 2.如何使用,请参考watchdog_test();
  11. 3. 在阻塞型的线程中,不能加入watchdog;
  12. 4. 本狗为应用狗;
  13. 其它说明:
  14. 修改记录:
  15. */
  16. /*------------------------------- 头文件 --------------------------------------
  17. */
  18. #include <stdio.h>
  19. #include <unistd.h>
  20. #include <string.h>
  21. #include <stdlib.h>
  22. #include <errno.h>
  23. #include <signal.h>
  24. #include <pthread.h>
  25. #include <time.h>
  26. #include <dirent.h>
  27. #include <signal.h>
  28. #include <sys/prctl.h>
  29. #include "../include/bspconfig.h"
  30. #include "rt.h"
  31. #include "watchdog.h"
  32. #include "shm_comm_packet.h"
  33. #include "shm.h"
  34. //#define WD_PATH "/tmp/wdt_record"
  35. #define WD_PATH "/app/data/wdt_record" //noted by sunxi: 保存在flash中,重启后,不会掉失
  36. #define WDT_MAX_ITEMS 64
  37. #define wd_info_str ""
  38. //心跳结构体
  39. typedef struct HB_T
  40. {
  41. uint16_t A;
  42. uint16_t B;
  43. uint16_t heartbeat_A; //每秒加1
  44. uint16_t heartbeat_B; //与heartbeat_A相同
  45. uint16_t crc;
  46. }HB_T;
  47. //裸核程序空间写完标志结构体
  48. typedef struct SHM_E907_PRO_W_F_T
  49. {
  50. uint16_t A;
  51. uint16_t B;
  52. uint16_t finish_flag; //裸核程序空间写完时,0x55写入finish_flag.
  53. uint32_t len;//裸核程序的实际长度
  54. uint16_t crc;
  55. }SHM_E907_PRO_W_F_T;
  56. typedef struct _wdt_item {
  57. const char *name;
  58. uint8_t feed;
  59. uint32_t runing;//计时器
  60. uint32_t period;//秒
  61. }wdt_item_t;
  62. struct ap_watchdog_t
  63. {
  64. int (*add_item)(const char *name, uint32_t *id, uint32_t period);
  65. int (*remove_item)(uint32_t id);
  66. int (*feed)(uint32_t id);
  67. int (*set_period)(uint32_t id, uint32_t seconds);
  68. uint32_t (*get_period)(uint32_t id);
  69. int (*start)(void);
  70. int (*stop)(void);
  71. int (*is_start)(void);
  72. int (*record)(const char *name);
  73. void (*set_debug)(int set);
  74. char appName[128];
  75. int _debug;
  76. wdt_item_t items[WDT_MAX_ITEMS];
  77. uint8_t runing_flag;
  78. pthread_mutex_t mutex;
  79. };
  80. static pthread_t pthread_tid = 0;
  81. int watchdog_feed_flag = 0;//放到心跳线程(与db进行心跳握手),以确保watchdog_thread线程运行正常
  82. struct ap_watchdog_t ap_watchdog;
  83. extern uint32_t wdt_id_mainloop;
  84. //检查文件夹是否存在
  85. //不存在,创建
  86. int CreateNewFolder(char* path)
  87. {
  88. DIR *dir;
  89. if(path == NULL)
  90. {
  91. return -1;
  92. }
  93. //检查文件夹是否存在
  94. dir = opendir(path);
  95. if(NULL == dir)
  96. {
  97. //不存在,创建
  98. mkdir(path,0775);
  99. }
  100. else
  101. {
  102. closedir(dir);
  103. }
  104. return 0;
  105. }
  106. int watchdog_init(void)
  107. {
  108. int i;
  109. CreateNewFolder(WD_PATH);
  110. memset(ap_watchdog.items, 0, sizeof(ap_watchdog.items));
  111. for(i=0; i<WDT_MAX_ITEMS; i++)
  112. ap_watchdog.items[i].feed = 1;
  113. ap_watchdog.runing_flag = 0;
  114. ap_watchdog._debug = 0;
  115. strcpy(ap_watchdog.appName, "dtu_t536");
  116. pthread_mutex_init(&ap_watchdog.mutex, NULL);
  117. ap_watchdog.start();
  118. return 0;
  119. }
  120. int watchdog_exit(void)
  121. {
  122. ap_watchdog.stop();
  123. pthread_join(pthread_tid, NULL);
  124. return 0;
  125. }
  126. //period:周期,单位为秒
  127. static int add_item(const char *name, uint32_t *id, uint32_t period)
  128. {
  129. int i;
  130. if(name == NULL)
  131. return -1;
  132. int ret = -1;
  133. // try to get mutex
  134. struct timespec tout;
  135. clock_gettime(CLOCK_REALTIME, &tout);
  136. tout.tv_sec += 5; //5s default
  137. if(pthread_mutex_timedlock(&ap_watchdog.mutex, &tout) != 0)
  138. return -1;
  139. for(i=0; i<WDT_MAX_ITEMS; i++)
  140. {
  141. if(ap_watchdog.items[i].name == NULL)
  142. {
  143. ap_watchdog.items[i].feed = 1;
  144. ap_watchdog.items[i].runing = 0;
  145. ap_watchdog.items[i].period = period;
  146. *id = i;
  147. ap_watchdog.items[i].name = name;
  148. rt_printf("WDT add_item i=%d,name= %s \r\n",i, ap_watchdog.items[i].name);
  149. ret = 0;
  150. break;
  151. }
  152. }
  153. pthread_mutex_unlock(&ap_watchdog.mutex);
  154. return ret;
  155. }
  156. static int remove_item(uint32_t id)
  157. {
  158. if(id > WDT_MAX_ITEMS)
  159. return -1;
  160. // try to get mutex
  161. struct timespec tout;
  162. clock_gettime(CLOCK_REALTIME, &tout);
  163. tout.tv_sec += 5; //5s default
  164. if(pthread_mutex_timedlock(&ap_watchdog.mutex, &tout) != 0)
  165. return -1;
  166. ap_watchdog.items[id].name = NULL;
  167. ap_watchdog.items[id].feed = 1;
  168. ap_watchdog.items[id].runing = 0;
  169. pthread_mutex_unlock(&ap_watchdog.mutex);
  170. return 0;
  171. }
  172. static int feed(uint32_t id)
  173. {
  174. if(id > WDT_MAX_ITEMS)
  175. return -1;
  176. ap_watchdog.items[id].feed = 1;
  177. return 0;
  178. }
  179. static int set_period(uint32_t id, uint32_t seconds)
  180. {
  181. if(id > WDT_MAX_ITEMS)
  182. return -1;
  183. ap_watchdog.items[id].period = seconds;
  184. return 0;
  185. }
  186. static uint32_t get_period(uint32_t id)
  187. {
  188. if(id > WDT_MAX_ITEMS)
  189. return -1;
  190. return ap_watchdog.items[id].period;
  191. }
  192. //设置心跳
  193. static int write_freq_hb(uint8_t *data)
  194. {
  195. int ret = 0;
  196. int cnt = 0;
  197. if(data == NULL)
  198. return -1;
  199. while(1)
  200. {
  201. ret = shm_comm_packet_write(SHM_ADDR_D_HB, data, sizeof(HB_T));
  202. if(ret > 0)
  203. {
  204. break;
  205. }
  206. if(++cnt > 3)
  207. break;
  208. usleep(30);
  209. }
  210. return ret;
  211. }
  212. static int get_e907_PRO_W_F(void)
  213. {
  214. SHM_E907_PRO_W_F_T flag;
  215. int ret = 0;
  216. int cnt = 0;
  217. while(1)
  218. {
  219. ret = shm_comm_packet_read(SHM_ADDR_U_PRO_W_F, sizeof(flag), (uint8_t *)&flag, sizeof(flag));
  220. if(ret > 0)
  221. {
  222. if(flag.finish_flag == 0x55)
  223. return flag.len;
  224. else
  225. return -1;
  226. }
  227. if(++cnt > 3)
  228. break;
  229. usleep(30);
  230. }
  231. return ret;
  232. }
  233. static void *watchdog_thread(void *arg)
  234. {
  235. int i = 0;
  236. static HB_T hb;
  237. int ret = 0;
  238. static int read_flag = 0;
  239. struct file * pfile;
  240. loff_t pos;
  241. memset(&hb,0,sizeof(hb));
  242. pthread_detach(pthread_self());
  243. prctl(PR_SET_NAME, "watchdog_thread");
  244. while (ap_watchdog.is_start()) {
  245. watchdog_feed_flag = 1;
  246. sleep(1);
  247. //把裸核程序读出来,并写到tmp文件夹中。
  248. if(read_flag == 0)
  249. {
  250. ret = get_e907_PRO_W_F();
  251. if(ret > 100)//返回裸核程序的实际长度,所以一定会大于100才有意义
  252. {
  253. //finish
  254. read_flag = 1;
  255. //从shm中读取裸核程序
  256. // 创建数据文件
  257. pfile = rt_file_open("/tmp/amp_rv0.bin",O_CREAT|O_RDWR,0);
  258. if(!IS_ERR(pfile))
  259. {
  260. pos = 0;
  261. rt_file_write(pfile, SHM_BASE_R+SHM_ADDR_U_E907_PRO, ret, &pos);
  262. rt_file_close(pfile,0);
  263. }
  264. }
  265. }
  266. hb.heartbeat_A++;
  267. hb.heartbeat_B = hb.heartbeat_A;
  268. write_freq_hb((uint8_t *)&hb); //给裸核的心跳
  269. for(i=0; i<WDT_MAX_ITEMS; i++)
  270. {
  271. if(ap_watchdog.items[i].name != NULL)
  272. {
  273. if(++ap_watchdog.items[i].runing > ap_watchdog.items[i].period)
  274. {
  275. if(ap_watchdog.items[i].feed != 1)
  276. {
  277. ap_watchdog.record(ap_watchdog.items[i].name);
  278. rt_printf("WDT-------%s is timeout!!!\r\n",ap_watchdog.items[i].name);
  279. system("reboot -f");
  280. //exit(EXIT_FAILURE);//TODO. noted by sunxi: 这里需要确认是,是重启应用程序,还是系统reboot!!!
  281. }
  282. else
  283. {
  284. if(ap_watchdog._debug)
  285. {
  286. rt_printf("WDT-------%s is Runing.\r\n",ap_watchdog.items[i].name);
  287. }
  288. }
  289. ap_watchdog.items[i].feed = 0;
  290. ap_watchdog.items[i].runing = 0;
  291. }
  292. }
  293. }
  294. }
  295. return NULL;
  296. }
  297. static int start(void)
  298. {
  299. int ret;
  300. if(ap_watchdog.runing_flag == 1)
  301. {
  302. return 0;
  303. }
  304. ap_watchdog._debug = 0;
  305. ap_watchdog.runing_flag = 1;
  306. ret = pthread_create(&pthread_tid, NULL, watchdog_thread, NULL);
  307. if(ret != 0) {
  308. rt_printf("can not create thread any more: %d\r\n",errno);
  309. ap_watchdog.runing_flag = 0;
  310. return -1;
  311. }
  312. return 0;
  313. }
  314. static int stop(void)
  315. {
  316. ap_watchdog.runing_flag = 0;
  317. return 0;
  318. }
  319. static int is_start(void)
  320. {
  321. return ap_watchdog.runing_flag;
  322. }
  323. static int record(const char *name)
  324. {
  325. time_t tt;
  326. char buf[512];
  327. FILE * f = NULL;
  328. char rec[256];
  329. char tmpBuf[128];
  330. int size;
  331. if(name == NULL)
  332. return -1;
  333. tt = time(NULL);
  334. memset(buf,0,sizeof(buf));
  335. sprintf(buf, "%s/wdt_record_%s", WD_PATH,ap_watchdog.appName);
  336. f = fopen(buf, "a");
  337. if(f == NULL) {
  338. perror("fopen");
  339. rt_printf("fopen is error!!!\r\n");
  340. return -1;
  341. }
  342. memset(tmpBuf, 0, sizeof(tmpBuf));
  343. strftime(tmpBuf, 128, "%Y-%m-%d %H:%M:%S", localtime(&tt));
  344. memset(rec, 0, sizeof(rec));
  345. size = snprintf(rec, sizeof(rec), "%s %s fuck you up; \r\n", tmpBuf, name);
  346. fwrite(rec, size, 1, f);
  347. fclose(f);
  348. return 0;
  349. }
  350. static void set_debug(int set)
  351. {
  352. ap_watchdog._debug = set;
  353. }
  354. struct ap_watchdog_t ap_watchdog =
  355. {
  356. .add_item = add_item,
  357. .remove_item = remove_item,
  358. .feed = feed,
  359. .set_period = set_period,
  360. .get_period = get_period,
  361. .start = start,
  362. .stop = stop,
  363. .is_start = is_start,
  364. .record = record,
  365. .set_debug = set_debug,
  366. };
  367. /*
  368. period: 看门狗的喂狗周期
  369. 因为可能存在某些线程,其运行过程很长,运行的中间,没有合里加sleep等休息函数,
  370. 造成占用时间较长。
  371. 于是,这里的period,可以尽量设置长一点的时间。
  372. */
  373. int watchdog_add_item(const char *name, uint32_t *id, uint32_t period)
  374. {
  375. int ret = 0;
  376. ret = ap_watchdog.add_item(name, id,period);
  377. return ret;
  378. }
  379. int watchdog_feed(uint32_t id)
  380. {
  381. int ret = 0;
  382. ret = ap_watchdog.feed(id);
  383. return ret;
  384. }
  385. int watchdog_remove_item(uint32_t id)
  386. {
  387. int ret = 0;
  388. ret = ap_watchdog.remove_item(id);
  389. return ret;
  390. }
  391. //===================================================================================================================
  392. //test
  393. void *watchdog_test_pthread(void *arg)
  394. {
  395. pthread_detach(pthread_self());
  396. uint32_t wdt_id;
  397. int rc = watchdog_add_item("watchdog_test_pthread", &wdt_id,10);
  398. if(rc != 0) {
  399. printf("can not add watchdog_test_pthread task to wdt\r\n");
  400. return NULL;
  401. }
  402. while(1)
  403. {
  404. //把喂狗操作放在首行,确保每个循环都有喂狗
  405. watchdog_feed(wdt_id);// 去掉本行喂狗,就可以产生记录
  406. sleep(1);
  407. }
  408. watchdog_remove_item(wdt_id);
  409. return NULL;
  410. }
  411. int watchdog_test(void)
  412. {
  413. int ret;
  414. watchdog_init();
  415. pthread_t ConnectCheck_id;
  416. ret = pthread_create(&ConnectCheck_id, NULL, watchdog_test_pthread, NULL);
  417. if(ret != 0) {
  418. printf("can not create watchdog_test_pthread\r\n");
  419. return -1;
  420. }
  421. return 0;
  422. }
  423. void watchdog_feed_mainloop(void)
  424. {
  425. watchdog_feed(wdt_id_mainloop);
  426. }
  427. void watchdog_feed_protect(void)
  428. {
  429. }
  430. void watchdog_reset_cpu(int i)
  431. {
  432. printf("reboot code: %d .\n", i);
  433. system("reboot -f");
  434. }
  435. void watchdog_feed_mainloop_50s(void)
  436. {
  437. watchdog_feed(wdt_id_mainloop);
  438. }
  439. /*------------------------------- 文件结束 -------------------------------*/