forked from akkana/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocx2html
More file actions
executable file
·42 lines (35 loc) · 1.08 KB
/
docx2html
File metadata and controls
executable file
·42 lines (35 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
from __future__ import print_function
import sys
import os.path
import mammoth
from bs4 import BeautifulSoup
def docx2html(infile, outfile):
with open(infile) as fp:
mammout = mammoth.convert_to_html(fp)
for m in mammout.messages:
print("Mammoth %s: %s" % (m.type, m.message))
# Prettyprint it
soup = BeautifulSoup(mammout.value, "lxml")
html = soup.prettify().encode("utf-8")
if outfile == "--":
print(html)
else:
with open(outfile, "w") as fp:
print(html, file=fp)
if __name__ == "__main__":
if len(sys.argv) < 2 or len(sys.argv) > 3 \
or sys.argv[1] == "-h" or sys.argv[1] == "--help":
print("Usage: %s infile.docx [outfile.html]")
sys.exit(1)
infile = sys.argv[1]
if len(sys.argv) == 3:
outfile = sys.argv[2]
else:
base, ext = os.path.splitext(infile)
if ext.lower() == ".docx":
outfile = base + ".html"
else:
outfile = infile + ".html"
print("mammoth", infile, outfile)
docx2html(infile, outfile)