Skip to content

Commit d5c8801

Browse files
authored
Merge pull request sumanth-0#814 from aniruddhaadak9/feature/simple-markdown-converter-789
Add Simple Markdown Converter - Fixes sumanth-0#789
2 parents 0b55331 + 4bb00b2 commit d5c8801

2 files changed

Lines changed: 214 additions & 0 deletions

File tree

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Simple Markdown Converter
2+
3+
A lightweight Python utility to convert Markdown files to HTML or plain text format.
4+
5+
## Description
6+
7+
This tool converts Markdown-formatted text files into HTML or plain text, supporting common Markdown elements including headers, lists, bold/italic text, links, and code blocks.
8+
9+
## Features
10+
11+
- **Headers**: Supports H1 through H6 headers
12+
- **Lists**: Converts unordered lists (*, -)
13+
- **Text Formatting**: Bold (**text**) and italic (*text*)
14+
- **Links**: Converts [text](url) format
15+
- **Code**: Both inline `code` and code blocks (```)
16+
- **Output Formats**: HTML or plain text
17+
18+
## Usage
19+
20+
### Basic Usage
21+
22+
```bash
23+
# Convert to HTML (default)
24+
python simple_markdown_converter.py input.md
25+
26+
# Convert to plain text
27+
python simple_markdown_converter.py input.md -f text
28+
29+
# Save output to file
30+
python simple_markdown_converter.py input.md -o output.html
31+
python simple_markdown_converter.py input.md -f text -o output.txt
32+
```
33+
34+
### Command Line Arguments
35+
36+
- `input_file`: Path to the input Markdown file (required)
37+
- `-o, --output`: Output file path (optional, defaults to stdout)
38+
- `-f, --format`: Output format - 'html' or 'text' (default: html)
39+
40+
## Example
41+
42+
**Input (example.md):**
43+
```markdown
44+
# My Document
45+
46+
This is a **bold** statement and this is *italic*.
47+
48+
## Features
49+
- First item
50+
- Second item
51+
52+
Visit [GitHub](https://github.com)
53+
```
54+
55+
**Output (HTML):**
56+
```html
57+
<h1>My Document</h1>
58+
<p>This is a <strong>bold</strong> statement and this is <em>italic</em>.</p>
59+
<h2>Features</h2>
60+
<ul>
61+
<li>First item</li>
62+
<li>Second item</li>
63+
</ul>
64+
<p>Visit <a href="https://github.com">GitHub</a></p>
65+
```
66+
67+
## Requirements
68+
69+
- Python 3.6 or higher
70+
- No external dependencies (uses only standard library)
71+
72+
## Author
73+
74+
Created as part of the 100 Lines of Python Code project.
75+
76+
## Issue Reference
77+
78+
Addresses Issue #789 - Simple Markdown Converter
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Simple Markdown Converter
4+
Converts Markdown to HTML and plain text formats.
5+
Supports headers, lists, bold, italic, links, and code blocks.
6+
"""
7+
8+
import re
9+
import sys
10+
import argparse
11+
12+
13+
class MarkdownConverter:
14+
"""A simple Markdown to HTML/plaintext converter."""
15+
16+
def __init__(self):
17+
self.html_output = []
18+
self.text_output = []
19+
20+
def convert_to_html(self, markdown_text):
21+
"""Convert markdown text to HTML."""
22+
lines = markdown_text.split('\n')
23+
in_code_block = False
24+
in_list = False
25+
26+
for line in lines:
27+
# Handle code blocks
28+
if line.strip().startswith('```'):
29+
if in_code_block:
30+
self.html_output.append('</code></pre>')
31+
in_code_block = False
32+
else:
33+
self.html_output.append('<pre><code>')
34+
in_code_block = True
35+
continue
36+
37+
if in_code_block:
38+
self.html_output.append(line)
39+
continue
40+
41+
# Handle headers
42+
header_match = re.match(r'^(#{1,6})\s+(.+)$', line)
43+
if header_match:
44+
level = len(header_match.group(1))
45+
content = header_match.group(2)
46+
content = self._process_inline(content)
47+
self.html_output.append(f'<h{level}>{content}</h{level}>')
48+
continue
49+
50+
# Handle unordered lists
51+
if re.match(r'^[*-]\s+', line):
52+
if not in_list:
53+
self.html_output.append('<ul>')
54+
in_list = True
55+
content = re.sub(r'^[*-]\s+', '', line)
56+
content = self._process_inline(content)
57+
self.html_output.append(f'<li>{content}</li>')
58+
continue
59+
else:
60+
if in_list:
61+
self.html_output.append('</ul>')
62+
in_list = False
63+
64+
# Handle empty lines
65+
if not line.strip():
66+
self.html_output.append('<br>')
67+
continue
68+
69+
# Handle regular paragraphs
70+
processed_line = self._process_inline(line)
71+
self.html_output.append(f'<p>{processed_line}</p>')
72+
73+
# Close any open list
74+
if in_list:
75+
self.html_output.append('</ul>')
76+
77+
return '\n'.join(self.html_output)
78+
79+
def _process_inline(self, text):
80+
"""Process inline markdown elements like bold, italic, links."""
81+
# Bold
82+
text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
83+
# Italic
84+
text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
85+
# Links
86+
text = re.sub(r'\[(.+?)\]\((.+?)\)', r'<a href="\2">\1</a>', text)
87+
# Inline code
88+
text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
89+
return text
90+
91+
def convert_to_plaintext(self, markdown_text):
92+
"""Convert markdown text to plain text (strip formatting)."""
93+
lines = markdown_text.split('\n')
94+
95+
for line in lines:
96+
# Remove markdown syntax
97+
line = re.sub(r'^#{1,6}\s+', '', line) # Headers
98+
line = re.sub(r'^[*-]\s+', '• ', line) # Lists
99+
line = re.sub(r'\*\*(.+?)\*\*', r'\1', line) # Bold
100+
line = re.sub(r'\*(.+?)\*', r'\1', line) # Italic
101+
line = re.sub(r'\[(.+?)\]\((.+?)\)', r'\1', line) # Links
102+
line = re.sub(r'`(.+?)`', r'\1', line) # Code
103+
self.text_output.append(line)
104+
105+
return '\n'.join(self.text_output)
106+
107+
108+
if __name__ == '__main__':
109+
parser = argparse.ArgumentParser(description='Convert Markdown to HTML or plain text')
110+
parser.add_argument('input_file', help='Input markdown file')
111+
parser.add_argument('-o', '--output', help='Output file (default: stdout)')
112+
parser.add_argument('-f', '--format', choices=['html', 'text'], default='html',
113+
help='Output format (default: html)')
114+
115+
args = parser.parse_args()
116+
117+
try:
118+
with open(args.input_file, 'r', encoding='utf-8') as f:
119+
markdown_content = f.read()
120+
except FileNotFoundError:
121+
print(f'Error: File {args.input_file} not found')
122+
sys.exit(1)
123+
124+
converter = MarkdownConverter()
125+
126+
if args.format == 'html':
127+
result = converter.convert_to_html(markdown_content)
128+
else:
129+
result = converter.convert_to_plaintext(markdown_content)
130+
131+
if args.output:
132+
with open(args.output, 'w', encoding='utf-8') as f:
133+
f.write(result)
134+
print(f'Output written to {args.output}')
135+
else:
136+
print(result)

0 commit comments

Comments
 (0)