parser.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. ## @package parser
  2. # Module caffe2.python.docs.parser
  3. import re
  4. class Parser(object):
  5. # List of tuples (regex_str, lambda(regex_match, formatter))
  6. # If a lambda returns True it will be called repeatedly with replacement
  7. # otherwise it will only be called on text that hasn't been parsed yet.
  8. regexes = [
  9. # Code blocks of various formats
  10. ('````(.+?)````',
  11. lambda m, f: f.addCode(m.group(1))
  12. ),
  13. ('```(.+?)```',
  14. lambda m, f: f.addCode(m.group(1))
  15. ),
  16. (r'((( {2})+)(\S.*)(\n\s*\n|\n))+',
  17. lambda m, f: f.addCode(m.group(0))
  18. ),
  19. (r'([^\.])\n',
  20. lambda m, f: f.addRaw('{c} '.format(c=m.group(1))) or True
  21. ),
  22. ('`(.+?)`',
  23. lambda m, f: f.addCode(m.group(1), True)
  24. ),
  25. # Make links clickable
  26. ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
  27. r'|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
  28. lambda m, f: f.addLink(m.group(0), m.group(0))
  29. ),
  30. (r'\*\*(.+?)\*\*',
  31. lambda m, f: f.addEmphasis(m.group(1), 2)
  32. ),
  33. (r'\*(.+?)\*',
  34. lambda m, f: f.addEmphasis(m.group(1), 1)
  35. ),
  36. ]
  37. def __init__(self, text, formatter):
  38. self.text = text
  39. self.lines = []
  40. self.formatter = formatter
  41. def parseText(self):
  42. UNPARSED = 0
  43. PARSED = 1
  44. parsed_block = [(UNPARSED, self.text)]
  45. for regex, func in self.regexes:
  46. index = 0
  47. while index < len(parsed_block):
  48. label, text = parsed_block[index]
  49. # Already been parsed
  50. if (label == PARSED):
  51. index += 1
  52. continue
  53. match = re.search(regex, text)
  54. if match:
  55. parsed_block.pop(index)
  56. start = match.start(0)
  57. end = match.end(0)
  58. f = self.formatter.clone()
  59. merge = func(match, f)
  60. if merge:
  61. merged = text[:start] + f.dump() + text[end:]
  62. parsed_block.insert(index, (UNPARSED, merged))
  63. else:
  64. if text[:start]:
  65. parsed_block.insert(index,
  66. (UNPARSED, text[:start]))
  67. index += 1
  68. parsed_block.insert(index, (PARSED, f.dump()))
  69. index += 1
  70. if text[end:]:
  71. parsed_block.insert(index,
  72. (UNPARSED, text[end:]))
  73. else:
  74. index += 1
  75. self.lines += [i for _, i in parsed_block]
  76. self.text = ' '.join(self.lines)
  77. def parse(self):
  78. self.parseText()
  79. return self.text