Coverage for pass_import/formats/xml.py: 97%

78 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-26 12:11 +0000

1# -*- encoding: utf-8 -*- 

2# pass import - Passwords importer swiss army knife 

3# Copyright (C) 2017-2024 Alexandre PUJOL <alexandre@pujol.io>. 

4# 

5 

6from xml.parsers.expat import ExpatError 

7 

8try: 

9 from defusedxml import ElementTree 

10 from defusedxml.ElementTree import ParseError 

11 from defusedxml.minidom import parse 

12except ImportError: 

13 from xml.etree import ElementTree 

14 from xml.etree.ElementTree import ParseError 

15 from xml.dom.minidom import parse 

16 

17from pass_import.core import Cap, register_detecters 

18from pass_import.detecter import Formatter 

19from pass_import.errors import FormatError 

20from pass_import.manager import PasswordImporter 

21 

22 

23class XML(Formatter, PasswordImporter): 

24 """Base class for XML based importers. 

25 

26 :param dict xml_header: XML root tag and doctype. 

27 

28 """ 

29 cap = Cap.FORMAT | Cap.IMPORT 

30 format = 'xml' 

31 xml_header = {} 

32 tree = None 

33 dom = None 

34 

35 # Import methods 

36 

37 @classmethod 

38 def _getroot(cls, tree): 

39 return tree 

40 

41 @classmethod 

42 def _getvalue(cls, element): 

43 return element.tag, element.text 

44 

45 def _getentry(self, elements): 

46 entry = {} 

47 keys = self.invkeys() 

48 for element in elements: 

49 xmlkey, value = self._getvalue(element) 

50 key = keys.get(xmlkey, xmlkey) 

51 entry[key] = value 

52 return entry 

53 

54 def _import(self, element, path=''): 

55 """Import method for XML based importer.""" 

56 raise NotImplementedError() 

57 

58 def parse(self): 

59 """Parse XML based file.""" 

60 self.tree = ElementTree.XML(self.file.read()) 

61 if not self.checkheader(self.header()): 

62 raise FormatError() 

63 root = self._getroot(self.tree) 

64 self._import(root) 

65 

66 # Format recognition methods 

67 

68 def is_format(self): 

69 """Return True if the file is an XML file.""" 

70 try: 

71 self.dom = parse(self.file) 

72 except (ParseError, ExpatError, UnicodeDecodeError): 

73 return False 

74 return True 

75 

76 def checkheader(self, header, only=False): 

77 """Ensure the file header is the same than the pm header.""" 

78 if self.dom: 

79 if self.dom.doctype: 

80 if self.dom.doctype.toxml() != header.get('doctype', ''): 

81 return False 

82 if self.dom.documentElement.tagName != header.get('root', ''): 

83 return False 

84 elif self.tree.tag != header.get('root', ''): 

85 return False 

86 return True 

87 

88 @classmethod 

89 def header(cls): 

90 """Header for XML file.""" 

91 return cls.xml_header 

92 

93 

94class HTML(Formatter, PasswordImporter): 

95 """Base class for HTML based importers.""" 

96 cap = Cap.FORMAT | Cap.IMPORT 

97 format = 'html' 

98 html_header = '' 

99 tree = None 

100 

101 # Import method 

102 

103 def parse(self): 

104 """Parse HTML based file.""" 

105 raise NotImplementedError() 

106 

107 # Format recognition methods 

108 

109 def is_format(self): 

110 """Return True if the file is an HTML file.""" 

111 try: 

112 self.tree = ElementTree.XML(self.file.read()) 

113 if self.tree.tag != 'html': 

114 return False 

115 except (ParseError, ExpatError): 

116 return False 

117 return True 

118 

119 def checkheader(self, header, only=False): 

120 """Ensure the file header is the same than the pm header.""" 

121 found = self.tree.find(header) 

122 if found is None: 

123 return False 

124 return True 

125 

126 @classmethod 

127 def header(cls): 

128 """Header for HTML file.""" 

129 return cls.html_header 

130 

131 

132register_detecters(XML, HTML)