Coverage for pass_import/formats/xml.py: 97%
78 statements
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-26 12:11 +0000
« prev ^ index » next coverage.py v7.4.3, created at 2024-02-26 12:11 +0000
1# -*- encoding: utf-8 -*-
2# pass import - Passwords importer swiss army knife
3# Copyright (C) 2017-2024 Alexandre PUJOL <alexandre@pujol.io>.
4#
6from xml.parsers.expat import ExpatError
8try:
9 from defusedxml import ElementTree
10 from defusedxml.ElementTree import ParseError
11 from defusedxml.minidom import parse
12except ImportError:
13 from xml.etree import ElementTree
14 from xml.etree.ElementTree import ParseError
15 from xml.dom.minidom import parse
17from pass_import.core import Cap, register_detecters
18from pass_import.detecter import Formatter
19from pass_import.errors import FormatError
20from pass_import.manager import PasswordImporter
23class XML(Formatter, PasswordImporter):
24 """Base class for XML based importers.
26 :param dict xml_header: XML root tag and doctype.
28 """
29 cap = Cap.FORMAT | Cap.IMPORT
30 format = 'xml'
31 xml_header = {}
32 tree = None
33 dom = None
35 # Import methods
37 @classmethod
38 def _getroot(cls, tree):
39 return tree
41 @classmethod
42 def _getvalue(cls, element):
43 return element.tag, element.text
45 def _getentry(self, elements):
46 entry = {}
47 keys = self.invkeys()
48 for element in elements:
49 xmlkey, value = self._getvalue(element)
50 key = keys.get(xmlkey, xmlkey)
51 entry[key] = value
52 return entry
54 def _import(self, element, path=''):
55 """Import method for XML based importer."""
56 raise NotImplementedError()
58 def parse(self):
59 """Parse XML based file."""
60 self.tree = ElementTree.XML(self.file.read())
61 if not self.checkheader(self.header()):
62 raise FormatError()
63 root = self._getroot(self.tree)
64 self._import(root)
66 # Format recognition methods
68 def is_format(self):
69 """Return True if the file is an XML file."""
70 try:
71 self.dom = parse(self.file)
72 except (ParseError, ExpatError, UnicodeDecodeError):
73 return False
74 return True
76 def checkheader(self, header, only=False):
77 """Ensure the file header is the same than the pm header."""
78 if self.dom:
79 if self.dom.doctype:
80 if self.dom.doctype.toxml() != header.get('doctype', ''):
81 return False
82 if self.dom.documentElement.tagName != header.get('root', ''):
83 return False
84 elif self.tree.tag != header.get('root', ''):
85 return False
86 return True
88 @classmethod
89 def header(cls):
90 """Header for XML file."""
91 return cls.xml_header
94class HTML(Formatter, PasswordImporter):
95 """Base class for HTML based importers."""
96 cap = Cap.FORMAT | Cap.IMPORT
97 format = 'html'
98 html_header = ''
99 tree = None
101 # Import method
103 def parse(self):
104 """Parse HTML based file."""
105 raise NotImplementedError()
107 # Format recognition methods
109 def is_format(self):
110 """Return True if the file is an HTML file."""
111 try:
112 self.tree = ElementTree.XML(self.file.read())
113 if self.tree.tag != 'html':
114 return False
115 except (ParseError, ExpatError):
116 return False
117 return True
119 def checkheader(self, header, only=False):
120 """Ensure the file header is the same than the pm header."""
121 found = self.tree.find(header)
122 if found is None:
123 return False
124 return True
126 @classmethod
127 def header(cls):
128 """Header for HTML file."""
129 return cls.html_header
132register_detecters(XML, HTML)