1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """factory methods to build real storage objects that conform to base.py"""
23
24 import os
25
26
27
28
29 decompressclass = {
30 'gz': ("gzip", "GzipFile"),
31 'bz2': ("bz2", "BZ2File"),
32 }
33
34
35 classes_str = {
36 "csv": ("csvl10n", "csvfile"),
37 "tab": ("omegat", "OmegaTFileTab"), "utf8": ("omegat", "OmegaTFile"),
38 "po": ("po", "pofile"), "pot": ("po", "pofile"),
39 "mo": ("mo", "mofile"), "gmo": ("mo", "mofile"),
40 "qm": ("qm", "qmfile"),
41 "utx": ("utx", "UtxFile"),
42 "_wftm": ("wordfast", "WordfastTMFile"),
43 "_trados_txt_tm": ("trados", "TradosTxtTmFile"),
44 "catkeys": ("catkeys", "CatkeysFile"),
45
46 "qph": ("qph", "QphFile"),
47 "tbx": ("tbx", "tbxfile"),
48 "tmx": ("tmx", "tmxfile"),
49 "ts": ("ts2", "tsfile"),
50 "xliff": ("xliff", "xlifffile"), "xlf": ("xliff", "xlifffile"),
51 "sdlxliff": ("xliff", "xlifffile"),
52 }
53
54
55 """Dictionary of file extensions and the names of their associated class.
56
57 Used for dynamic lazy loading of modules.
58 _ext is a pseudo extension, that is their is no real extension by that name.
59 """
60
62 """Determine the true filetype for a .txt file"""
63 if isinstance(storefile, basestring) and os.path.exists(storefile):
64 storefile = open(storefile)
65 try:
66 start = storefile.read(600).strip()
67 except AttributeError:
68 raise ValueError("Need to read object to determine type")
69
70 from translate.storage import wordfast
71 if wordfast.TAB_UTF16 in start.split("\n")[0]:
72 encoding = 'utf-16'
73 else:
74 encoding = 'iso-8859-1'
75 start = start.decode(encoding).encode('utf-8')
76 if '%Wordfast TM' in start:
77 pseudo_extension = '_wftm'
78 elif '<RTF Preamble>' in start:
79 pseudo_extension = '_trados_txt_tm'
80 else:
81 raise ValueError("Failed to guess file type.")
82 storefile.seek(0)
83 return pseudo_extension
84
85 hiddenclasses = {"txt": _examine_txt}
86
87
89 """Guesses the type of a file object by looking at the first few characters.
90 The return value is a file extention ."""
91 start = storefile.read(300).strip()
92 if '<xliff ' in start:
93 extention = 'xlf'
94 elif 'msgid "' in start:
95 extention = 'po'
96 elif '%Wordfast TM' in start:
97 extention = 'txt'
98 elif '<!DOCTYPE TS>' in start:
99 extention = 'ts'
100 elif '<tmx ' in start:
101 extention = 'tmx'
102 elif '#UTX' in start:
103 extention = 'utx'
104 else:
105 raise ValueError("Failed to guess file type.")
106 storefile.seek(0)
107 return extention
108
109
111 """Provides a dummy name for a file object without a name attribute, by guessing the file type."""
112 return 'dummy.' + _guessextention(storefile)
113
114
116 """returns the filename"""
117 if storefile is None:
118 raise ValueError("This method cannot magically produce a filename when given None as input.")
119 if not isinstance(storefile, basestring):
120 if not hasattr(storefile, "name"):
121 storefilename = _getdummyname(storefile)
122 else:
123 storefilename = storefile.name
124 else:
125 storefilename = storefile
126 return storefilename
127
128
130 """Factory that returns the applicable class for the type of file presented.
131 Specify ignore to ignore some part at the back of the name (like .gz). """
132 storefilename = _getname(storefile)
133 if ignore and storefilename.endswith(ignore):
134 storefilename = storefilename[:-len(ignore)]
135 root, ext = os.path.splitext(storefilename)
136 ext = ext[len(os.path.extsep):].lower()
137 decomp = None
138 if ext in decompressclass:
139 decomp = ext
140 root, ext = os.path.splitext(root)
141 ext = ext[len(os.path.extsep):].lower()
142 if ext in hiddenclasses:
143 guesserfn = hiddenclasses[ext]
144 if decomp:
145 _module, _class = decompressclass[decomp]
146 module = __import__(_module, globals(), {}, [])
147 _file = getattr(module, _class)
148 ext = guesserfn(_file(storefile))
149 else:
150 ext = guesserfn(storefile)
151 try:
152
153 if classes:
154 storeclass = classes[ext]
155 else:
156 _module, _class = classes_str[ext]
157 module = __import__("translate.storage.%s" % _module, globals(), {}, _module)
158 storeclass = getattr(module, _class)
159 except KeyError:
160 raise ValueError("Unknown filetype (%s)" % storefilename)
161 return storeclass
162
163
165 """Factory that returns a usable object for the type of file presented.
166
167 @type storefile: file or str
168 @param storefile: File object or file name.
169
170 Specify ignore to ignore some part at the back of the name (like .gz).
171 """
172
173 if isinstance(storefile, basestring):
174 if os.path.isdir(storefile) or storefile.endswith(os.path.sep):
175 from translate.storage import directory
176 return directory.Directory(storefile)
177 storefilename = _getname(storefile)
178 storeclass = getclass(storefile, ignore, classes=classes, classes_str=classes_str, hiddenclasses=hiddenclasses)
179 if os.path.exists(storefilename) or not getattr(storefile, "closed", True):
180 name, ext = os.path.splitext(storefilename)
181 ext = ext[len(os.path.extsep):].lower()
182 if ext in decompressclass:
183 _module, _class = decompressclass[ext]
184 module = __import__(_module, globals(), {}, [])
185 _file = getattr(module, _class)
186 storefile = _file(storefilename)
187 store = storeclass.parsefile(storefile)
188 else:
189 store = storeclass()
190 store.filename = storefilename
191 return store
192
193
194 supported = [
195 ('Gettext PO file', ['po', 'pot'], ["text/x-gettext-catalog", "text/x-gettext-translation", "text/x-po", "text/x-pot"]),
196 ('XLIFF Translation File', ['xlf', 'xliff', 'sdlxliff'], ["application/x-xliff", "application/x-xliff+xml"]),
197 ('Gettext MO file', ['mo', 'gmo'], ["application/x-gettext-catalog", "application/x-mo"]),
198 ('Qt .qm file', ['qm'], ["application/x-qm"]),
199 ('TBX Glossary', ['tbx'], ['application/x-tbx']),
200 ('TMX Translation Memory', ['tmx'], ["application/x-tmx"]),
201 ('Qt Linguist Translation File', ['ts'], ["application/x-linguist"]),
202 ('Qt Phrase Book', ['qph'], ["application/x-qph"]),
203 ('OmegaT Glossary', ['utf8', 'tab'], ["application/x-omegat-glossary"]),
204 ('UTX Simple Dictionary', ['utx'], ["text/x-utx"]),
205 ('Haiku catkeys file', ['catkeys'], ["application/x-catkeys"]),
206 ]
207
209 """Returns data about all supported files
210
211 @return: list of type that include (name, extensions, mimetypes)
212 @rtype: list
213 """
214 return supported[:]
215