gns3feed - generate ATOM/RSS feed from GNS3 community website
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

261 lines
9.5KB

  1. #!/usr/bin/env python3
  2. """
  3. gns3feed - generate ATOM/RSS feed from GNS3 community website
  4. usage: gns3feed [-h] [-v] [-a] [-r] [-c CACHE] [feed_file]
  5. positional arguments:
  6. feed_file feed file, default = ~/GNS3/feed/gns3.xml
  7. optional arguments:
  8. -h, --help, -? prints this screen
  9. -v, --version prints version
  10. -a, --atom generate ATOM feed (default)
  11. -r, --rss generate RSS feed
  12. -c CACHE, --cache CACHE cache file, default = ~/GNS3/feed/cache.json
  13. """
  14. __version__ = "0.1"
  15. import os
  16. import sys
  17. import argparse
  18. import hashlib
  19. import json
  20. import re
  21. import datetime
  22. import dateutil.parser
  23. import urllib3
  24. import requests
  25. from feedgen.feed import FeedGenerator
  26. DEFAULT_FEED_FILE = "~/GNS3/feed/gns3.xml"
  27. DEFAULT_CACHE_FILE = "~/GNS3/feed/cache.json"
  28. HOST = "https://gns3.com"
  29. def requests_session_retry(retries=2, backoff=0.3):
  30. """ open requests session with retry parameter """
  31. session = requests.Session()
  32. retry = urllib3.util.retry.Retry(total=retries, backoff_factor=backoff)
  33. adapter = requests.adapters.HTTPAdapter(max_retries=retry)
  34. session.mount('http://', adapter)
  35. session.mount('https://', adapter)
  36. return session
  37. def get_data(session, url, params=None):
  38. """ get data from GNS3 website """
  39. headers = {'User-Agent': 'gns3feed/' + __version__,
  40. 'X-Client-ID': '145c4252-323d-4306-bb17-9793f4dd7751'}
  41. try:
  42. resp = session.get(HOST + url, params=params, headers=headers,
  43. timeout=10)
  44. resp.raise_for_status()
  45. if not resp.headers['Content-Type'].startswith('application/json'):
  46. sys.exit("{} - Unexpected content type {}".format(
  47. url, resp.headers['Content-Type']))
  48. data = resp.json()
  49. except (requests.exceptions.RequestException, ValueError) as err:
  50. msg = str(err)
  51. match = re.search(r"\(Caused by ([a-zA-Z0-9_]+)\('[^:]*: (.*)'\)", msg)
  52. if match:
  53. msg = "{}: {} for host: {}".format(*match.groups(), HOST)
  54. sys.exit("{} - {}".format(url, msg))
  55. return data
  56. def parse_date(date_string):
  57. """ convert a date string to a datetime object """
  58. date = dateutil.parser.parse(date_string)
  59. if date.tzinfo is None:
  60. date = date.replace(tzinfo=datetime.timezone.utc)
  61. return date
  62. def date_id(date):
  63. """ create an id from a date """
  64. return date.astimezone(datetime.timezone.utc).strftime('%Y%m%d%H%M%S')
  65. def cache_id(msg):
  66. """ create cache id from message """
  67. return "-".join((msg['id'], str(msg['num_posts']),
  68. date_id(parse_date(msg['updated']['date']))))
  69. def feed_id(msg, date=None):
  70. """ create feed id from message """
  71. if not date:
  72. date = parse_date(msg['updated']['date'])
  73. return msg['id'] + "-" + date_id(date)
  74. INVALID_CHARS = re.compile("[" \
  75. "\u0000-\u0008\u000B\u000E-\u001F\u007F-\u009F"
  76. "\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF"
  77. "\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF"
  78. "\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF"
  79. "\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF"
  80. "\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF"
  81. "\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF"
  82. "\U0010FFFE\U0010FFFF]")
  83. REPLACEMENT_CHAR = "\uFFFD"
  84. def clean_string(string):
  85. """ clean a string from invalid characters """
  86. return INVALID_CHARS.sub(REPLACEMENT_CHAR, string)
  87. def get_content(post):
  88. """ get the content of a post """
  89. content = ''
  90. if 'blocks' in post:
  91. for block in post['blocks']:
  92. if block['type'] == 'html':
  93. content = block['body']
  94. break
  95. elif block['type'] == 'text' and not content:
  96. content = block['body']
  97. if not content and 'body' in post:
  98. content = post['body']
  99. content = clean_string(content)
  100. content = re.sub(r'<p>\s*</p>[ \t]*', "", content)
  101. content = re.sub(r'(</p>\s*)<br */?>[ \t]*', r'\1', content)
  102. return content
  103. def article_update_date(session, article):
  104. """ calculate update date of an article """
  105. date = parse_date(article['added']['date'])
  106. for block in article['blocks']:
  107. date = max(date, parse_date(block['updated']['date']))
  108. if article['num_posts'] > 0:
  109. param_post_id = {'__apioptions': '{"page":"'+article['id']+'"}'}
  110. for post in get_data(session, "/api/v2/posts", param_post_id):
  111. date = max(date, parse_date(post['added']['date']))
  112. for comment in post['comments']:
  113. date = max(date, parse_date(comment['added']['date']))
  114. return date
  115. def add_articles(feed, session, cache, api_base, api_opt, link_base):
  116. """ add articles of an api section """
  117. for article in get_data(session, api_base, {'__apioptions': api_opt}):
  118. article_id = cache_id(article)
  119. if article_id in cache['old']:
  120. item = cache['old'][article_id]
  121. else:
  122. item = {}
  123. item['title'] = clean_string(article['title'].strip())
  124. thread = get_data(session, api_base + "/" + article['slug'])
  125. replies = article['num_posts']
  126. if 'posts' in thread:
  127. replies -= 1
  128. if article['status'] == 'closed':
  129. item['title'] = "[Solved] " + item['title']
  130. item['content'] = get_content(thread['posts'][0])
  131. item['update_date'] = parse_date(article['updated']['date'])
  132. else:
  133. item['content'] = get_content(thread)
  134. item['update_date'] = article_update_date(session, thread)
  135. if replies > 0:
  136. item['title'] += " ({:+d})".format(replies)
  137. cache['updated'] = True
  138. cache['new'][article_id] = item
  139. feed_entry = feed.add_entry(order='append')
  140. feed_entry.id(HOST + '/feed/article/' + \
  141. feed_id(article, item['update_date']))
  142. feed_entry.title(item['title'])
  143. feed_entry.published(item['update_date'])
  144. feed_entry.updated(item['update_date'])
  145. feed_entry.author(name=article['added']['by']['name']['full'])
  146. feed_entry.dc.dc_creator(article['added']['by']['name']['full'])
  147. feed_entry.content(content=item['content'], type="html")
  148. feed_entry.link(href=HOST+link_base+"/"+article['slug'],
  149. rel='alternate')
  150. def generate_feed(feed_file, cache_file, is_atom):
  151. """ generate ATOM/RSS feed """
  152. feed_file = os.path.expanduser(feed_file)
  153. cache_file = os.path.expanduser(cache_file)
  154. feed = FeedGenerator()
  155. feed.load_extension("dc", atom=False, rss=True)
  156. feed.title("GNS3 Discussions")
  157. feed.description("GNS3 Discussions")
  158. feed.link(href=HOST+"/community/latest", rel='alternate')
  159. feed.language('en')
  160. cache = {'old': {}, 'new': {}, 'updated': False}
  161. try:
  162. with open(cache_file, "r") as cache_fp:
  163. cache['old'] = json.load(cache_fp)
  164. for item in cache['old'].values():
  165. item['update_date'] = \
  166. datetime.datetime.fromisoformat(item['update_date'])
  167. except FileNotFoundError:
  168. pass
  169. except OSError as err:
  170. sys.exit("Can't open cache: {}".format(err))
  171. sess = requests_session_retry()
  172. add_articles(feed, sess, cache, "/api/v2/pages",
  173. '{"type":"article","status":"published","limit":2,"sort":"-published.date"}',
  174. "/news/article")
  175. add_articles(feed, sess, cache, "/api/v2/threads",
  176. '{"type":"community","limit":20,"sort":"-updated.date"}',
  177. "/community/discussion")
  178. md5_hash = hashlib.md5()
  179. for feed_entry in feed.entry():
  180. md5_hash.update(feed_entry.id().encode('utf-8'))
  181. feed.id(HOST + '/feed/' + md5_hash.hexdigest())
  182. try:
  183. action = "feed"
  184. if cache['updated'] or not os.path.exists(feed_file):
  185. if is_atom:
  186. feed.atom_file(feed_file + ".new")
  187. else:
  188. feed.rss_file(feed_file + ".new")
  189. os.replace(feed_file + ".new", feed_file)
  190. action = "cache"
  191. with open(cache_file, "w") as cache_fp:
  192. for item in cache['new'].values():
  193. item['update_date'] = item['update_date'].isoformat()
  194. json.dump(cache['new'], cache_fp)
  195. except OSError as err:
  196. sys.exit("Can't create {}: {}".format(action, err))
  197. # Main
  198. parser = argparse.ArgumentParser(add_help=False, \
  199. description='%(prog)s - generate ATOM/RSS feed from GNS3 community website', \
  200. formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=30))
  201. parser.add_argument('-h', '--help', '-?', action='help',
  202. help='prints this screen')
  203. parser.add_argument('-v', '--version', action='version',
  204. version="%(prog)s v" + __version__,
  205. help='prints version')
  206. parser.set_defaults(atom=True)
  207. parser.add_argument('-a', '--atom', dest='atom', action='store_true',
  208. help='generate ATOM feed (default)')
  209. parser.add_argument('-r', '--rss', dest='atom', action='store_false',
  210. help='generate RSS feed')
  211. parser.add_argument('-c', '--cache', dest='cache', default=DEFAULT_CACHE_FILE,
  212. help='cache file, default = %(default)s')
  213. parser.add_argument('feed_file', default=DEFAULT_FEED_FILE, nargs='?',
  214. help='feed file, default = %(default)s')
  215. args = parser.parse_args()
  216. generate_feed(args.feed_file, args.cache, args.atom)