forked from shibing624/python-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtail_tag.py
More file actions
33 lines (29 loc) · 980 Bytes
/
tail_tag.py
File metadata and controls
33 lines (29 loc) · 980 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# -*- coding: utf-8 -*-
# Author: XuMing <[email protected]>
# Data: 17/9/18
# Brief:
import sys
tag_map = {}
with open("taguser.txt", encoding="utf-8") as f:
for line in f:
parts = line.strip().split("\t")
if parts and float(parts[2]) >= 0.95:
key, val = parts[0], parts[1]
if key and val:
tag_map[key] = val
common_ad_words = set()
with open("ad_words.txt", encoding="utf-8")as f:
for line in f:
common_ad_words.add(line.strip().strip("\t")[0])
text_set = set()
with open("demo.txt", encoding="utf-8")as f:
for line in f:
parts = line.strip().split("\t")
userid = parts[4]
variant_word = parts[-2]
if variant_word not in common_ad_words and userid in tag_map:
text_set.add("\t".join([line.strip(), "tag:", tag_map[userid]]))
with open("result.txt", encoding="utf-8", mode="w")as f:
for line in text_set:
f.write(line)
f.write("\n")