Commit 8b42de1a authored by dasharatha.vamshi's avatar dasharatha.vamshi

data validation

parent cbbd3b69
import pandas as pd
import numpy as np
from datetime import datetime
from pymongo import MongoClient
import uuid
client = MongoClient('mongodb://svc-ilens:svc2345@192.168.10.10:21017')
# print(client.list_database_names())
mydatabase = client["ilens_wps"]
userCollection=mydatabase.userConfiguration
eventCollection = mydatabase.test
findCollection = mydatabase.eventLogs
df1 = pd.read_csv('testfrt.txt', sep="\t")
df1.columns = ['S.No',"emp_id", "date", "timestamps"]
df1copy = df1.copy()
print(df1copy)
df1['date']=df1['date'].astype(str)
# df1['timestamps']=df1['timestamps'].astype(str)
df1['emp_id']=df1['emp_id'].astype(str)
times = df1['timestamps'].tolist()
sep = []
for i in times:
sep.append(i.count('|'))
print('-----------------------------------------',sum(sep))
# print(sep)
df1['count'] = sep
df1.drop(df1.columns[[0,3]], axis = 1,inplace = True)
df2 = pd.read_csv('mongodb.txt', sep="\t")
df2.columns = ['S.No',"emp_id", "date", "timestamps"]
df2['date']=df2['date'].astype(str)
times1 = df2['timestamps'].tolist()
sep1 = []
for j in times1:
sep1.append(j.count('|'))
# print(sep)
df2['count'] = sep1
# df2['timestamps']=df2['timestamps'].astype(str)
df2['emp_id']=df2['emp_id'].astype(str)
df2copy = df2
df2.drop(df2.columns[[0,3]], axis = 1,inplace = True)
# df2copy = df2
df3 = pd.read_csv('msserver.txt', sep="\t")
df3.columns = ['S.No',"emp_id", "date", "timestamps"]
df2['date']=df2['date'].astype(str)
df3.drop(df3.columns[[0,3]], axis = 1,inplace = True)
df3['emp_id']=df3['emp_id'].astype(str)
# df3['timestamps']=df3['timestamps'].astype(str)
print(df1)
print(df2)
print(df3)
def dataframe_difference(df1, df2, which='both'):
"""Find rows which are different between two DataFrames."""
comparison_df = df1.merge(df2,
indicator=True,
how='outer')
if which is None:
diff_df = comparison_df[comparison_df['_merge'] != 'both']
else:
diff_df = comparison_df[comparison_df['_merge'] == which]
diff_df.to_csv('diff.txt')
return diff_df
def not_present(df1, df2, which='left_only'):
"""Find rows which are different between two DataFrames."""
comparison_df = df1.merge(df2,
indicator=True,
how='outer')
if which is None:
diff_df = comparison_df[comparison_df['_merge'] != 'left_only']
else:
diff_df = comparison_df[comparison_df['_merge'] == which]
diff_df.to_csv('not_present.txt')
return diff_df
# key_cols = ['emp_id', 'date', 'timestamps']
# print('Matching in between mongo and msserver')
# df2copy.merge(df3.loc[:, df3.columns.isin(key_cols)])
# print(df2copy)
# print('Matching in between frt and mongo')
# df2.merge(df1.loc[:, df1.columns.isin(key_cols)])
# print(df2)
print('Matching in between frt and mongo')
# diff_df23 = dataframe_difference(df2, df3)
# print(diff_df23)
diff_df12 = dataframe_difference(df1, df2)
print(diff_df12)
# print('Matching in between mongo and msserver')
# # diff_df23 = dataframe_difference(df2, df3)
# # print(diff_df23)
# diff_df23 = dataframe_difference(df2, df3)
# print(diff_df23)
print(df1copy)
print('Matching not in between frt and mongo')
not_present23 = not_present(df1, df2)
print(not_present23)
f = not_present23['count'].tolist()
print('++++++++++++++++',sum(f))
df_final = pd.merge(df1copy, not_present23, on=['emp_id','date'])
print (df_final)
emps = df_final['emp_id'].tolist()
dates = df_final['date'].tolist()
timestamps = df_final['timestamps'].tolist()
# print(emps,len(emps))
# print(dates,len(dates))
# print(timestamps,len(timestamps))
mydict = {}
for i in range(len(emps)):
timestamps[i] = timestamps[i][1:]
# print(timestamps)
for i in range(len(emps)):
x = timestamps[i].split('|')
for ind in range(len(x)):
t = dates[i] + ' '+ x[ind]
v = datetime.strptime(t,'%Y-%m-%d %H:%M')
x[ind] = v
# print(type(x[ind])
timestamps[i] = x
# print(timestamps,len(timestamps))
for i in range(len(emps)):
if emps[i] in mydict.keys():
mydict[emps[i]]=mydict[emps[i]]+timestamps[i]
else:
mydict[emps[i]] = timestamps[i]
# print(mydict)
# print(mydict.keys(),len(mydict))
# mylist = list(dict.fromkeys(emps))
# print(mylist,len(mylist))
# x= [*mydict]
# x.sort()
# mylist.sort()
# # using == to check if
# # lists are equal
# if x == mylist:
# print ("The lists are identical")
# else :
# print ("The lists are not identical")
# for key,value in mydict.items():
# print(key,value)
# print(df1copy)
# key_cols = ['emp_id','date']
# print(df1copy.merge(not_present23.loc[:, not_present23.columns.isin(key_cols)]))
# print(not_present23)
# mydict = {"1011" :[datetime(2020, 8, 11, 16, 4)]}
# events = eventCollection.count_documents({})
# print(events)
for key, value in mydict.items():
# empid = findCollection.find_one({'eventMessage.Person ID':key})
# num = empid['eventMessage']['People']
# print(num)
for times in value:
data = {
"eventId": str(uuid.uuid1()).split('-')[0],
"rules": {
"ruleId": [
'Log Attendance',
'BodyTemperatureSensor'
]
},
"cameraId": '6b1b229e',
"cameraName": 'Security Reception',
"imgeCrop": True,
"timestamp": times,
"eventMessage": {
"People": '-',
"Event": 36.4,
'Person ID': key
},
"flag": {
"People": 'log-in',
"Event": 'Safe'
},
"tagId": 'RTSP',
"frame": '',
"deviceId": '6b1b229e',
"message": 'Mask',
"video_location": None,
"eventtype": 'Temperature Exceedence',
"temp_exceedence_check": False,
"mask_check": True,
"pilot_model": 'Mask+Facenet+Temp',
"vector_distance": 0
}
eventCollection.insert(data)
\ No newline at end of file
from pymongo import MongoClient
import collections
import pymssql
import pandas as pd
from requests import post, auth, get
import datetime
import json
import pandas as pd
from datetime import date, timedelta,datetime
# try:
# collectionsAbc = collections.abc
# except AttributeError:
# collectionsAbc = collections
time = '2020-08-01'
starttime = datetime.strptime(time, '%Y-%m-%d')
end = '2020-08-11'
endtime = datetime.strptime(end, '%Y-%m-%d')
print(starttime,endtime)
conn = pymssql.connect(server='192.10.10.24\\SQLEXPRESS', user='sa',
password='Shree@123', database='TimeAttendance')
client = MongoClient('mongodb://svc-ilens:svc2345@192.10.10.220:21017')
# print(client.list_database_names())
mydatabase = client["ilens_wps"]
userCollection=mydatabase.userConfiguration
eventCollection = mydatabase.eventLogs
mydays = []
for n in range(int ((endtime - starttime).days+1)):
mydays.append(starttime + timedelta(n))
# print(mydays)
def getUserCollection():
query = {"emp_working": True ,"emp_train": "Completed"}
employee_ids = {}
for document in userCollection.find(query):
# print(document)
employee_ids[document['emp_id']] = document['emp_name']
# print(employee_ids)
return employee_ids
def frt():
mydict = getUserCollection()
# mydict = {'2727':''}
# df_data=[]
emp_time = {}
for dat in mydays:
print(dat,str(dat)[0:10]+'T00:00:00+00:00',str(dat)[0:10]+'T23:59:59+00:00')
for key,value in mydict.items():
empid = key
post_json = {
"get_logs": {
'uri': 'http://192.10.10.221:80/ISAPI/AccessControl/AcsEvent?format=json',
'data': {
"AcsEventCond":
{
"searchID": " ",
"searchResultPosition": 0,
"major": 0,
"minor": 0,
"maxResults": 19,
"startTime": str(dat)[0:10]+'T00:00:00+00:00',
"endTime": str(dat)[0:10]+'T23:59:59+00:00',
"employeeNoString": str(key),
"picEnable": False,
"timeReverseOrder": False
}
}
}
}
output = {}
for key, value in post_json.items():
res = post(value['uri'], json=value['data'], auth=auth.HTTPDigestAuth('admin', 'kl@12345'))
# print(res.text)
output[key] = res.text
try:
json_val = json.loads(output['get_logs'])
total_matches = json_val['AcsEvent']['totalMatches']
data1,timestamps,time1 = [],[],[]
# print('total matches',total_matches)
if(int(total_matches)!=0):
for i in json_val['AcsEvent']['InfoList']:
timestamps.append(i['time'])
print(timestamps)
if empid in emp_time.keys():
emp_time[empid]=emp_time[empid]+timestamps
else:
emp_time[empid] = timestamps
# stringval = ''
# for time in timestamps:
# # print(time.split('T')[1][0:5])
# if len(time1)==0:
# time1.append(time.split('T')[1][0:5])
# # stringval = stringval + '|'+ time.split('T')[1][0:5]
# else:
# l = len(time1)
# # print(time.split('T')[1][0:5].replace(':',''),time1[-1].replace(':',''))
# x = (int(time.split('T')[1][0:5].replace(':','')) - int(time1[-1].replace(':','')) )
# # print(x)
# if (x>=15):
# time1.append(time.split('T')[1][0:5])
# for vl in time1:
# stringval = stringval + '|' + vl
# print(stringval)
# data1 = [i['employeeNoString'],time.split('T')[0],stringval]
# df_data.append(data1)
# df = pd.DataFrame(df_data, columns = ['emp_id','date','timestamps'])
# df1 = df.drop_duplicates(subset=['emp_id', 'date', 'timestamps'])
# df1.to_csv('frt.txt', sep='\t')
except:
pass
print(emp_time)
df_data = []
for dat in mydays:
# timestamps = []
for key, value in emp_time.items():
timestamps = []
time1 = []
for t in value:
if(str(dat)[0:10]==str(t)[0:10]):
timestamps.append(t)
print(timestamps)
if(len(timestamps)!=0):
# time1 = []
# for dt in datelist:
stringval = ''
for time in timestamps:
# print(time.split('T')[1][0:5])
if len(time1)==0:
time1.append(time.split('T')[1][0:5])
# stringval = stringval + '|'+ time.split('T')[1][0:5]
else:
l = len(time1)
# print(time.split('T')[1][0:5].replace(':',''),time1[-1].replace(':',''))
x = (int(time.split('T')[1][0:5].replace(':','')) - int(time1[-1].replace(':','')) )
# print(x)
if (x>=15):
time1.append(time.split('T')[1][0:5])
for vl in time1:
stringval = stringval + '|' + vl
print(stringval)
data1 = [key,time.split('T')[0],stringval]
df_data.append(data1)
df = pd.DataFrame(df_data, columns = ['emp_id','date','timestamps'])
df1 = df.drop_duplicates(subset=['emp_id', 'date', 'timestamps'])
df1.to_csv('testfrt.txt', sep='\t')
# frt()
def eventLogsCollection():
mydict = getUserCollection()
# print(mydict,len(mydict))
checkdict = {}
presentdict = {}
timedict = {}
for key,value in mydict.items():
query = {"eventMessage.Person ID":key,'timestamp': {"$gte": starttime, "$lte": endtime}}
# print(query)
# timelist = []
timestamps = eventCollection.find(query)
# for i in timestamps:
# if(len(timelist)==0):
# timelist.append(i['timestamp'])
# else:
# length = len(timelist)
# x = (i['timestamp'] - timelist[-1] ).seconds/60
# # print(x)
# if (x>=15):
# timelist.append(i['timestamp'])
# if(timelist[length-1])
# print(key,i['timestamp'])
events = eventCollection.count_documents(query)
if events!=0:
# timedict[key] = timelist
presentdict[key] = value
else:
checkdict[key] = value
# df_list = []
# print('misssing in mongo\n',checkdict,len(checkdict))
# print('Present in mongo\n',presentdict,len(presentdict))
# print('timestamps\n',timedict)
# for key,values in timedict.items():
# dt = ''
# for val in values:
# dt = dt + '|' + val.strftime("%H:%M")
# lis = [key,values[0].strftime('%Y-%m-%d'),dt]
# df_list.append(lis)
# df = pd.DataFrame(df_list,columns = ['emp_id','date','timestamps'])
# df.to_csv('mongodb.txt', sep='\t')
df_list = []
for t in mydays:
print(t,t+timedelta(days=1))
# df_list = []
for key,value in mydict.items():
query = {"eventMessage.Person ID":key,'timestamp': {"$gte": t, "$lt": t+timedelta(days=1)}}
# df_list = []
# print(eventCollection.count_documents(query))
# print(query)
timelist = []
timestamps = eventCollection.find(query)
for i in timestamps:
# print('----',i)
if(len(timelist)==0):
timelist.append(i['timestamp'])
else:
length = len(timelist)
x = (i['timestamp'] - timelist[-1] ).seconds/60
# print(x)
if (x>=15):
timelist.append(i['timestamp'])
# if(timelist[length-1])
# print(key,i['timestamp'])
events = eventCollection.count_documents(query)
if events!=0:
timedict[key] = timelist
# presentdict[key] = value
else:
pass
# checkdict[key] = value
# df_list = []
# print('misssing in mongo\n',checkdict,len(checkdict))
# print('Present in mongo\n',presentdict,len(presentdict))
print('timestamps\n',timedict)
for key,values in timedict.items():
dt = ''
for val in values:
dt = dt + '|' + val.strftime("%H:%M")
lis = [key,values[0].strftime('%Y-%m-%d'),dt]
df_list.append(lis)
df = pd.DataFrame(df_list,columns = ['emp_id','date','timestamps'])
df1 = df.drop_duplicates(subset=['emp_id', 'date', 'timestamps'])
df1.to_csv('mongodb.txt', sep='\t')
return checkdict,presentdict
# eventLogsCollection()
def mssql():
check,present = eventLogsCollection()
cursor = conn.cursor()
time = {}
print(present)
# print(cursor)
in_msqsql_dict = {}
not_in_mssql_dict = {}
for key,value in present.items():
cursor.execute(
"Select count(*) from dbo.T_Tra_TblAttendance where (F_Tbl_Emp_Code='{0}') AND CAST(F_Tbl_dtpunched as DATE) >= '{1}' AND CAST(F_Tbl_dtpunched as DATE) <= '{2}'".format(key,starttime,endtime))
count = cursor.fetchone()[0]
print(count)
if count!=0:
in_msqsql_dict[key] = value
else:
not_in_mssql_dict[key] = value
print('in msserver\n',in_msqsql_dict)
print('not in msserver\n',not_in_mssql_dict)
df_list = []
for t in mydays:
# df_list = []
# print(t)
for key,value in present.items():
# df_list = []
cursor.execute(
"Select F_Tbl_In1,F_Tbl_Out1,F_Tbl_In2,F_Tbl_Out2,F_Tbl_Int3,F_Tbl_Out3,F_Tbl_Int4,F_Tbl_Out4 from dbo.T_Tra_TblAttendance where (F_Tbl_Emp_Code='{0}') AND CAST(F_Tbl_dtpunched as DATE) = '{1}' AND CAST(F_Tbl_dtpunched as DATE) = '{2}'".format(key,t,t))
records = cursor.fetchall()
res1 = [list(ele) for ele in records]
# print(res)
if(len(res1)!=0):
print(res1[0])
res = []
for val in res1[0]:
if val != None :
res.append(val)
time[key] = res
print(time)
for key,values in time.items():
dt = ''
for val in values:
print(type(val),val)
if type(val)!='NoneType':
print(val)
dt = dt + '|' + val.strftime("%H:%M")
lis = [key,values[0].strftime('%Y-%m-%d'),dt]
df_list.append(lis)
df = pd.DataFrame(df_list,columns = ['emp_id','date','timestamps'])
df1 = df.drop_duplicates(subset=['emp_id', 'date', 'timestamps'])
df1.to_csv('msserver.txt', sep='\t')
# print(time)
mssql()
This diff is collapsed.
This diff is collapsed.
emp_id date timestamps
0 1211 2020-08-03 |07:43|15:50
1 2126 2020-08-03 |07:11|07:21|08:51
2 2180 2020-08-03 |07:07|13:57
3 2212 2020-08-03 |07:30
4 2217 2020-08-03 |07:08|13:54
5 2258 2020-08-03 |07:38|17:39
6 2308 2020-08-03 |07:50|17:27
7 2469 2020-08-03 |08:05
8 2489 2020-08-03 |07:48|16:41
9 2497 2020-08-03 |07:25|17:27
10 2525 2020-08-03 |07:10|13:41
11 2552 2020-08-03 |07:08|13:27
12 2556 2020-08-03 |07:37
13 2563 2020-08-03 |07:26
14 2627 2020-08-03 |07:31
15 2632 2020-08-03 |07:39|13:24
16 2680 2020-08-03 |08:35
17 2704 2020-08-03 |07:10|08:05
18 2706 2020-08-03 |07:48|10:42|16:41
19 2710 2020-08-03 |11:18|14:23
20 2713 2020-08-03 |07:11|08:34
21 2729 2020-08-03 |13:56
22 2727 2020-08-03 |07:54|18:26
23 2309 2020-08-03 |16:00
43 2309 2020-08-04 |07:06|15:53
64 2309 2020-08-05 |07:03|15:55
66 1995 2020-08-06 |15:31
89 1995 2020-08-09 |05:59|07:26
94 2358 2020-08-09 |07:06
95 2459 2020-08-09 |11:24
110 2690 2020-08-09 |07:27
111 2733 2020-08-09 |05:50
113 1288 2020-08-10 |07:26
114 1789 2020-08-10 |06:39|16:04
115 1802 2020-08-10 |07:13
116 1804 2020-08-10 |06:40
117 1831 2020-08-10 |07:02|16:03
118 1849 2020-08-10 |06:39
119 1857 2020-08-10 |15:59
120 1858 2020-08-10 |06:40
121 1870 2020-08-10 |06:59|16:03
122 1878 2020-08-10 |16:01
123 1907 2020-08-10 |15:58
124 1956 2020-08-10 |07:08|16:04
125 1960 2020-08-10 |07:43
126 1961 2020-08-10 |16:07
127 1972 2020-08-10 |06:56|16:02
128 1973 2020-08-10 |07:07
130 1999 2020-08-10 |06:52|16:01
131 2008 2020-08-10 |06:39
132 2040 2020-08-10 |07:22
133 2066 2020-08-10 |06:38|15:59
134 2087 2020-08-10 |05:52
135 2093 2020-08-10 |14:03
136 2094 2020-08-10 |07:22
137 2098 2020-08-10 |06:32|16:09
138 2104 2020-08-10 |15:58
139 2109 2020-08-10 |06:51|15:53
140 2114 2020-08-10 |16:10
141 2115 2020-08-10 |06:40|07:40
143 2149 2020-08-10 |07:06|15:57
144 2193 2020-08-10 |06:54|16:08
145 2197 2020-08-10 |11:09
146 2202 2020-08-10 |15:59
147 2204 2020-08-10 |16:02
149 2219 2020-08-10 |06:30|15:52
150 2235 2020-08-10 |06:37|15:52
151 2247 2020-08-10 |05:42|06:39|07:48|16:01
152 2251 2020-08-10 |06:40|15:59
153 2252 2020-08-10 |06:40
154 2253 2020-08-10 |16:00
155 2255 2020-08-10 |16:02
157 2260 2020-08-10 |05:55
158 2266 2020-08-10 |14:03
159 2277 2020-08-10 |06:39|15:59
160 2303 2020-08-10 |06:34
161 2305 2020-08-10 |06:22
163 2347 2020-08-10 |05:38|06:46|07:02|16:01
165 2410 2020-08-10 |07:11
166 2422 2020-08-10 |06:39
167 2435 2020-08-10 |07:10|15:51
169 2464 2020-08-10 |07:09|07:18
170 2467 2020-08-10 |06:41
171 2473 2020-08-10 |07:06|15:44
175 2542 2020-08-10 |15:48
177 2559 2020-08-10 |16:03
178 2569 2020-08-10 |16:04
179 2586 2020-08-10 |15:55
180 2596 2020-08-10 |07:18|16:09
181 2606 2020-08-10 |07:12|15:59
182 2610 2020-08-10 |07:08
183 2611 2020-08-10 |07:22
186 2636 2020-08-10 |15:49
187 2643 2020-08-10 |06:58
189 2683 2020-08-10 |07:08|15:52
193 2714 2020-08-10 |06:44|16:03
211 2062 2020-08-11 |07:05|16:02
,emp_id,date,count,_merge
5,2699,2020-08-01,2,left_only
6,2729,2020-08-01,1,left_only
7,2101,2020-08-02,1,left_only
8,2180,2020-08-02,1,left_only
9,2217,2020-08-02,1,left_only
10,2525,2020-08-02,2,left_only
11,2552,2020-08-02,1,left_only
12,2680,2020-08-02,1,left_only
13,2699,2020-08-02,2,left_only
14,2704,2020-08-02,1,left_only
15,2713,2020-08-02,1,left_only
16,2729,2020-08-02,3,left_only
17,67890,2020-08-02,1,left_only
18,2126,2020-08-02,3,left_only
19,2101,2020-08-03,2,left_only
21,2217,2020-08-03,2,left_only
22,2525,2020-08-03,2,left_only
23,2552,2020-08-03,2,left_only
25,2699,2020-08-03,1,left_only
27,2713,2020-08-03,1,left_only
31,2126,2020-08-03,4,left_only
43,2309,2020-08-03,2,left_only
54,2258,2020-08-04,2,left_only
62,2706,2020-08-04,3,left_only
78,2126,2020-08-05,5,left_only
83,2497,2020-08-05,2,left_only
91,U123,2020-08-05,1,left_only
112,1211,2020-08-06,2,left_only
137,2101,2020-08-07,2,left_only
162,2525,2020-08-08,2,left_only
169,2181,2020-08-08,2,left_only
182,2575,2020-08-08,2,left_only
185,2590,2020-08-08,2,left_only
197,2180,2020-08-09,2,left_only
198,2217,2020-08-09,2,left_only
199,2525,2020-08-09,3,left_only
200,2552,2020-08-09,2,left_only
202,2699,2020-08-09,2,left_only
204,2713,2020-08-09,2,left_only
207,2126,2020-08-09,5,left_only
212,2497,2020-08-09,2,left_only
217,2706,2020-08-09,2,left_only
221,U123,2020-08-09,3,left_only
223,2153,2020-08-09,2,left_only
225,2190,2020-08-09,1,left_only
228,2280,2020-08-09,2,left_only
229,2420,2020-08-09,2,left_only
237,2575,2020-08-09,1,left_only
241,2648,2020-08-09,2,left_only
242,2650,2020-08-09,2,left_only
244,2376,2020-08-09,1,left_only
246,2413,2020-08-09,1,left_only
247,2535,2020-08-09,2,left_only
248,2619,2020-08-09,1,left_only
249,2630,2020-08-09,1,left_only
250,2651,2020-08-09,1,left_only
258,2010,2020-08-09,1,left_only
268,2121,2020-08-09,1,left_only
270,2131,2020-08-09,1,left_only
273,2193,2020-08-09,1,left_only
277,2245,2020-08-09,1,left_only
282,2320,2020-08-09,2,left_only
283,2332,2020-08-09,1,left_only
284,2347,2020-08-09,2,left_only
287,2412,2020-08-09,1,left_only
289,2438,2020-08-09,1,left_only
301,2703,2020-08-09,1,left_only
314,2212,2020-08-10,2,left_only
318,2497,2020-08-10,2,left_only
320,2563,2020-08-10,1,left_only
323,2706,2020-08-10,2,left_only
327,U123,2020-08-10,2,left_only
330,2181,2020-08-10,3,left_only
331,2190,2020-08-10,2,left_only
335,2454,2020-08-10,2,left_only
337,2598,2020-08-10,1,left_only
338,2698,2020-08-10,2,left_only
341,2575,2020-08-10,2,left_only
342,1940,2020-08-10,1,left_only
343,2452,2020-08-10,2,left_only
344,2590,2020-08-10,2,left_only
349,2387,2020-08-10,1,left_only
352,2619,2020-08-10,1,left_only
357,1335,2020-08-10,1,left_only
358,1643,2020-08-10,3,left_only
368,1964,2020-08-10,1,left_only
372,2008,2020-08-10,2,left_only
380,2115,2020-08-10,3,left_only
383,2136,2020-08-10,2,left_only
387,2202,2020-08-10,2,left_only
392,2252,2020-08-10,2,left_only
398,2347,2020-08-10,5,left_only
402,2412,2020-08-10,1,left_only
407,2464,2020-08-10,2,left_only
412,2559,2020-08-10,2,left_only
413,2586,2020-08-10,2,left_only
418,2610,2020-08-10,2,left_only
421,2643,2020-08-10,2,left_only
426,1853,2020-08-10,1,left_only
427,1857,2020-08-10,2,left_only
430,1929,2020-08-10,1,left_only
432,1968,2020-08-10,1,left_only
439,2253,2020-08-10,2,left_only
442,2400,2020-08-10,1,left_only
444,2534,2020-08-10,1,left_only
448,2684,2020-08-10,1,left_only
449,2716,2020-08-10,1,left_only
451,2101,2020-08-11,2,left_only
454,2525,2020-08-11,3,left_only
460,2126,2020-08-11,5,left_only
462,2258,2020-08-11,2,left_only
467,2563,2020-08-11,3,left_only
478,2190,2020-08-11,2,left_only
481,2420,2020-08-11,2,left_only
483,1995,2020-08-11,1,left_only
494,2619,2020-08-11,2,left_only
495,2651,2020-08-11,2,left_only
498,1335,2020-08-11,2,left_only
499,1643,2020-08-11,2,left_only
500,1789,2020-08-11,2,left_only
501,1802,2020-08-11,1,left_only
502,1804,2020-08-11,2,left_only
503,1831,2020-08-11,2,left_only
504,1849,2020-08-11,2,left_only
505,1858,2020-08-11,2,left_only
506,1870,2020-08-11,2,left_only
511,1999,2020-08-11,1,left_only
512,2008,2020-08-11,3,left_only
513,2010,2020-08-11,2,left_only
514,2040,2020-08-11,1,left_only
517,2073,2020-08-11,2,left_only
519,2094,2020-08-11,2,left_only
520,2098,2020-08-11,2,left_only
522,2115,2020-08-11,2,left_only
525,2136,2020-08-11,2,left_only
526,2149,2020-08-11,1,left_only
528,2202,2020-08-11,3,left_only
531,2247,2020-08-11,2,left_only
532,2251,2020-08-11,2,left_only
533,2252,2020-08-11,2,left_only
537,2320,2020-08-11,2,left_only
538,2347,2020-08-11,3,left_only
547,2464,2020-08-11,2,left_only
548,2467,2020-08-11,2,left_only
551,2559,2020-08-11,2,left_only
553,2596,2020-08-11,2,left_only
555,2608,2020-08-11,1,left_only
557,2640,2020-08-11,2,left_only
558,2643,2020-08-11,2,left_only
560,2683,2020-08-11,2,left_only
561,2703,2020-08-11,2,left_only
562,2714,2020-08-11,2,left_only
564,1857,2020-08-11,2,left_only
571,1961,2020-08-11,3,left_only
574,2093,2020-08-11,2,left_only
576,2114,2020-08-11,2,left_only
579,2253,2020-08-11,2,left_only
581,2266,2020-08-11,2,left_only
583,2400,2020-08-11,1,left_only
584,2404,2020-08-11,1,left_only
585,2433,2020-08-11,1,left_only
588,2482,2020-08-11,1,left_only
589,2498,2020-08-11,2,left_only
592,2569,2020-08-11,2,left_only
597,2716,2020-08-11,2,left_only
599,1299,2020-08-11,1,left_only
600,1737,2020-08-11,1,left_only
603,1924,2020-08-11,1,left_only
607,2085,2020-08-11,1,left_only
609,2194,2020-08-11,1,left_only
615,2423,2020-08-11,1,left_only
618,2495,2020-08-11,1,left_only
619,2560,2020-08-11,1,left_only
620,2572,2020-08-11,2,left_only
627,2664,2020-08-11,1,left_only
628,2723,2020-08-11,1,left_only
import pandas as pd
import numpy as np
df1 = pd.read_csv('frt.txt', sep="\t")
df1.columns = ['S.No',"emp_id", "date", "timestamps"]
# df1['date']=df1['date'].astype(str)
# # df1['timestamps']=df1['timestamps'].astype(str)
# df1['emp_id']=df1['emp_id'].astype(str)
df1.drop(df1.columns[[0]], axis = 1,inplace = True)
print('----',df1)
df2 = df1.drop_duplicates(subset=['emp_id', 'date', 'timestamps'])
print('---',df2)
df2.to_csv('frt.txt', sep='\t')
# df1.drop(df1.columns[[0,3]], axis = 1,inplace = True)
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment