Skip to content

Commit 5883495

Browse files
Add Simple Markdown Converter for issue sumanth-0#789
Implements a markdown to HTML/plaintext converter with support for: - Headers (H1-H6) - Lists (unordered) - Bold and italic text - Links - Code blocks and inline code References sumanth-0#789
1 parent b7fb013 commit 5883495

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Simple Markdown Converter
4+
Converts Markdown to HTML and plain text formats.
5+
Supports headers, lists, bold, italic, links, and code blocks.
6+
"""
7+
8+
import re
9+
import sys
10+
import argparse
11+
12+
13+
class MarkdownConverter:
14+
"""A simple Markdown to HTML/plaintext converter."""
15+
16+
def __init__(self):
17+
self.html_output = []
18+
self.text_output = []
19+
20+
def convert_to_html(self, markdown_text):
21+
"""Convert markdown text to HTML."""
22+
lines = markdown_text.split('\n')
23+
in_code_block = False
24+
in_list = False
25+
26+
for line in lines:
27+
# Handle code blocks
28+
if line.strip().startswith('```'):
29+
if in_code_block:
30+
self.html_output.append('</code></pre>')
31+
in_code_block = False
32+
else:
33+
self.html_output.append('<pre><code>')
34+
in_code_block = True
35+
continue
36+
37+
if in_code_block:
38+
self.html_output.append(line)
39+
continue
40+
41+
# Handle headers
42+
header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
43+
if header_match:
44+
level = len(header_match.group(1))
45+
content = header_match.group(2)
46+
content = self._process_inline(content)
47+
self.html_output.append(f'<h{level}>{content}</h{level}>')
48+
continue
49+
50+
# Handle unordered lists
51+
if re.match(r'^[*-]\s+', line):
52+
if not in_list:
53+
self.html_output.append('<ul>')
54+
in_list = True
55+
content = re.sub(r'^[*-]\s+', '', line)
56+
content = self._process_inline(content)
57+
self.html_output.append(f'<li>{content}</li>')
58+
continue
59+
else:
60+
if in_list:
61+
self.html_output.append('</ul>')
62+
in_list = False
63+
64+
# Handle empty lines
65+
if not line.strip():
66+
self.html_output.append('<br>')
67+
continue
68+
69+
# Handle regular paragraphs
70+
processed_line = self._process_inline(line)
71+
self.html_output.append(f'<p>{processed_line}</p>')
72+
73+
# Close any open list
74+
if in_list:
75+
self.html_output.append('</ul>')
76+
77+
return '\n'.join(self.html_output)
78+
79+
def _process_inline(self, text):
80+
"""Process inline markdown elements like bold, italic, links."""
81+
# Bold
82+
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
83+
# Italic
84+
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
85+
# Links
86+
text = re.sub(r'\[(.+?)\]\((.+?)\)', r'<a href="\2">\1</a>', text)
87+
# Inline code
88+
text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
89+
return text
90+
91+
def convert_to_plaintext(self, markdown_text):
92+
"""Convert markdown text to plain text (strip formatting)."""
93+
lines = markdown_text.split('\n')
94+
95+
for line in lines:
96+
# Remove markdown syntax
97+
line = re.sub(r'^#{1,6}\s+', '', line) # Headers
98+
line = re.sub(r'^[*-]\s+', '• ', line) # Lists
99+
line = re.sub(r'\*\*(.+?)\*\*', r'\1', line) # Bold
100+
line = re.sub(r'\*(.+?)\*', r'\1', line) # Italic
101+
line = re.sub(r'\[(.+?)\]\((.+?)\)', r'\1', line) # Links
102+
line = re.sub(r'`(.+?)`', r'\1', line) # Code
103+
self.text_output.append(line)
104+
105+
return '\n'.join(self.text_output)
106+
107+
108+
if __name__ == '__main__':
109+
parser = argparse.ArgumentParser(description='Convert Markdown to HTML or plain text')
110+
parser.add_argument('input_file', help='Input markdown file')
111+
parser.add_argument('-o', '--output', help='Output file (default: stdout)')
112+
parser.add_argument('-f', '--format', choices=['html', 'text'], default='html',
113+
help='Output format (default: html)')
114+
115+
args = parser.parse_args()
116+
117+
try:
118+
with open(args.input_file, 'r', encoding='utf-8') as f:
119+
markdown_content = f.read()
120+
except FileNotFoundError:
121+
print(f'Error: File {args.input_file} not found')
122+
sys.exit(1)
123+
124+
converter = MarkdownConverter()
125+
126+
if args.format == 'html':
127+
result = converter.convert_to_html(markdown_content)
128+
else:
129+
result = converter.convert_to_plaintext(markdown_content)
130+
131+
if args.output:
132+
with open(args.output, 'w', encoding='utf-8') as f:
133+
f.write(result)
134+
print(f'Output written to {args.output}')
135+
else:
136+
print(result)

0 commit comments

Comments
 (0)