Skip to content

Commit 45c2ce0

Browse files
committed
Merge branch 'webproject' of github.com:animysore/cheqify-python into webproject
2 parents f1376ad + 1606dc8 commit 45c2ce0

21 files changed

Lines changed: 570 additions & 18 deletions

.DS_Store

6 KB
Binary file not shown.

ocr/scene.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import pytesseract
2+
import cv2
3+
import sys
4+
import math
5+
import numpy as np
6+
from datetime import datetime,timedelta
7+
import os
8+
from PIL import Image
9+
from os import listdir
10+
from os.path import isfile, join
11+
import re
12+
import pandas as pd
13+
14+
15+
base_dir ="/mnt/"
16+
#base_dir ="" #Uncomment to run locally
17+
18+
def ocr(file,lang,option,d):
19+
# Define config parameters.
20+
# '--oem 1' for using LSTM OCR Engine
21+
config = ('-l '+lang+' --oem 1 --psm 3')
22+
if option == 1:
23+
# Read image from disk
24+
im = cv2.imread(file, cv2.IMREAD_COLOR)
25+
else :
26+
im = file
27+
28+
if d == 1:
29+
# Without denoising (Ticker only)
30+
temp = im
31+
temp = cv2.bitwise_not(temp)
32+
temp = cv2.resize(temp, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
33+
thresh = 127
34+
temp = cv2.threshold(temp, thresh, 255, cv2.THRESH_BINARY)[1]
35+
temp = cv2.threshold(temp, 0, 255, cv2.THRESH_BINARY_INV)[1]
36+
con = pytesseract.image_to_data(temp, output_type='data.frame')
37+
con = con[con.conf != -1]
38+
con = con.groupby(['block_num'])['conf'].mean()
39+
text = pytesseract.image_to_string(temp, config=config)
40+
else:
41+
#With denoising (Invertion)
42+
temp = im
43+
temp = cv2.fastNlMeansDenoisingColored(temp,None,20,10,7,21)
44+
temp = cv2.fastNlMeansDenoising(temp,None,10,7,21)
45+
temp = cv2.bitwise_not(temp)
46+
temp = cv2.resize(temp, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
47+
thresh = 127
48+
temp = cv2.threshold(temp, thresh, 255, cv2.THRESH_BINARY)[1]
49+
#temp = cv2.threshold(temp, 0, 255, cv2.THRESH_BINARY_INV)[1]
50+
con = pytesseract.image_to_data(temp, output_type='data.frame')
51+
con = con[con.conf != -1]
52+
con = con.groupby(['block_num'])['conf'].mean()
53+
text = pytesseract.image_to_string(temp, config=config)
54+
55+
#With denoising (Without invertion)
56+
temp1 =im
57+
temp1 = cv2.fastNlMeansDenoisingColored(temp1,None,20,10,7,21)
58+
temp1 = cv2.fastNlMeansDenoising(temp1,None,10,7,21)
59+
temp1 = cv2.bitwise_not(temp1)
60+
temp1 = cv2.resize(temp1, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
61+
thresh = 127
62+
if lang != 'ben':
63+
temp1 = cv2.threshold(temp1, thresh, 255, cv2.THRESH_BINARY)[1]
64+
temp1 = cv2.threshold(temp1, 0, 255, cv2.THRESH_BINARY_INV)[1]
65+
con1 = pytesseract.image_to_data(temp1, output_type='data.frame')
66+
con1 = con1[con1.conf != -1]
67+
con1 = con1.groupby(['block_num'])['conf'].mean()
68+
text1 = pytesseract.image_to_string(temp1, config=config)
69+
70+
# Test conditions
71+
f=0
72+
if con.empty and text != '' and con1.empty and text1 != '':
73+
return (text,con)
74+
if con.empty and con1.empty:
75+
if text1 != '':
76+
return (text1,con1)
77+
else: return (text,con)
78+
elif con1.empty and text !='':
79+
con1 =con
80+
return (text,con)
81+
elif con.empty and text1 !='':
82+
con =con1
83+
return (text1,con1)
84+
if con[1] > con1[1]:
85+
text = text
86+
elif con1[1] >con[1]:
87+
text = text1
88+
con = con1
89+
return(text,con)
90+
91+
92+
# Write to output file
93+
94+
def writefile(op,boxes,no,ms,base,text,lang):
95+
start = base+timedelta(milliseconds=ms)
96+
end = end = start + timedelta(milliseconds = 2200)
97+
st = int(''.join(re.findall('\d',str(start))))/1000000
98+
en = int(''.join(re.findall('\d',str(end))))/1000000
99+
100+
# Modify ticker text for continuity
101+
splitted = text.split()
102+
if len(splitted)>2:
103+
if re.findall('[A-Za-z0-9]',splitted[0]):
104+
if len(splitted[0])<6: #English
105+
splitted = splitted[1:]
106+
elif len(splitted[0])<=8: #General
107+
splitted = splitted[1:]
108+
#Eliminate last word
109+
if re.findall('[A-Za-z0-9]',splitted[-1]):
110+
if len(splitted[-1])<=4: #English
111+
splitted = splitted[:-1]
112+
elif len(splitted[-1])<=8: #Hindi/Bengali
113+
splitted = splitted[:-1]
114+
text = ' '.join(splitted)
115+
116+
# Write output to file
117+
op.write(str("%.3f"%round(st,3)) +'|'+str("%.3f"%round(en,3))+'|TIC2|'+str("%06d" %no)+'|'+\
118+
str("%03d" %int(boxes[0]))+' '+str("%03d" %int(boxes[2]))+' '+str("%03d" %abs(boxes[1]-boxes[0]))+' '+str("%03d" %abs(boxes[3]-boxes[2]))+'|')
119+
op.write(text.replace('\n',' ').replace('\r',' ')+'\n')
120+
121+
122+
## fetch_output(file,boxes,frame_no,timestamp,start_time_utc,lang)
123+
124+
def ocr_ticker(op,boxes,no,ts,base,lang):
125+
text=''
126+
try:
127+
text,con = ocr(base_dir+'tickimg.jpg',lang,1,1)
128+
if "".join(text.split()) == '':
129+
raise Exception('blank')
130+
writefile(op,boxes,no,ts,base,text,lang)
131+
os.remove(base_dir+'tickimg.jpg')
132+
os.remove(base_dir+'backup.jpg')
133+
except:
134+
#Execute backup if tickimg is blank or exception
135+
try:
136+
text,con =ocr(base_dir+'backup.jpg',lang,1,1)
137+
if text != '':
138+
writefile(op,boxes,no,ts,base,text,lang)
139+
os.remove(base_dir+'tickimg.jpg')
140+
os.remove(base_dir+'backup.jpg')
141+
except Exception as err:
142+
return
143+
''' ERROR
144+
er = open(base_dir+'outputs/output1.txt',"a")
145+
er.write(str(no)+str(err))
146+
er.write('\n')
147+
er.close()'''

0 commit comments

Comments
 (0)