Coverage for pass_import/formats/csv.py: 94%

63 statements  

« prev     ^ index     » next       coverage.py v7.4.3, created at 2024-02-26 12:11 +0000

1# -*- encoding: utf-8 -*- 

2# pass import - Passwords importer swiss army knife 

3# Copyright (C) 2017-2024 Alexandre PUJOL <alexandre@pujol.io>. 

4# 

5 

6import csv 

7from typing import List 

8 

9from pass_import.core import Cap, register_detecters 

10from pass_import.detecter import Formatter 

11from pass_import.errors import FormatError 

12from pass_import.manager import PasswordImporter 

13 

14 

15class CSV(Formatter, PasswordImporter): 

16 """Base class for CSV based importers. 

17 

18 :param list fieldnames: The list of CSV field names 

19 :param str csv_header: If required special csv header to look for in the 

20 file. 

21 

22 """ 

23 cap = Cap.FORMAT | Cap.IMPORT 

24 format = 'csv' 

25 csv_header = '' 

26 fieldnames: List = [] 

27 quotechar = '"' 

28 reader = None 

29 

30 # Import method 

31 

32 def parse(self): 

33 """Parse CSV based file.""" 

34 fields = None if not self.fieldnames else self.fieldnames 

35 self.reader = csv.DictReader(self.file, 

36 fieldnames=fields, 

37 delimiter=self.delimiter, 

38 quotechar=self.quotechar) 

39 if not self.checkheader(self.header(), self.only): 

40 raise FormatError() 

41 

42 keys = self.invkeys() 

43 for row in self.reader: 

44 entry = {} 

45 for col in row: 

46 entry[keys.get(col, col)] = row.get(col, None) 

47 

48 self.data.append(entry) 

49 

50 # Format recognition methods 

51 

52 def is_format(self) -> bool: 

53 """Return True if the file is a CSV file.""" 

54 try: 

55 dialect = csv.Sniffer().sniff(self.file.read(4096), 

56 delimiters=self.delimiter) 

57 if dialect.quotechar != self.quotechar: # pragma: no cover 

58 return False 

59 self.file.seek(0) 

60 self.reader = csv.DictReader(self.file, dialect=dialect) 

61 

62 # Context: 

63 # 1password can export data in: 

64 # - 1PIF (json like format) 

65 # - CSV 

66 # Problem: 

67 # CSV sniffer considers the following line as a CSV 

68 # line and will provide a dialect for it. 

69 # 

70 # line: {"field0":"foo", ..., fieldX:"baz"} 

71 # 

72 # This is clearly a JSON formatted line. 

73 # Solution: 

74 # If the line looks like a JSON, then consider the 

75 # file not a CSV file 

76 def is_json_key_value_format(value): 

77 values = value.split(':', 2) 

78 return len(values) > 1 

79 

80 def is_json_open_end_bracket(keys): 

81 count = len(keys) 

82 if count > 0: 

83 if ( 

84 keys[0].startswith('{') and 

85 keys[-1].endswith('}') 

86 ): 

87 return False 

88 return False 

89 

90 # Read first data line when available 

91 for index, row in enumerate(self.reader): 

92 if index == 0: 

93 # skip header 

94 continue 

95 

96 keys = [k for k in row.keys() if k is not None] 

97 if all(map(is_json_key_value_format, keys)): 

98 if is_json_open_end_bracket(keys): 

99 return False 

100 

101 break 

102 except (csv.Error, UnicodeDecodeError): 

103 return False 

104 return True 

105 

106 def checkheader(self, header: List, only: bool = False) -> bool: 

107 """Ensure the file header is the same than the pm header.""" 

108 try: 

109 if only and len(self.reader.fieldnames) != len(header): 

110 return False 

111 for csvkey in header: 

112 if csvkey not in self.reader.fieldnames: 

113 return False 

114 return True 

115 except csv.Error: 

116 return False 

117 

118 @classmethod 

119 def header(cls): 

120 """Header for CSV file.""" 

121 return cls.keys.values() 

122 

123 

124register_detecters(CSV)