dasaproject/pdfexcel4DNwithxlrd.py

# Python program to
# access Excel files

# Import required library
# import xlwings as xw

import pandas as pd
# from openpyxl import load_workbook
# import openpyxl
import xlrd
import configparser
import os

config = configparser.ConfigParser()
config.read('ItemReceipt.ini')
item_inventory_path = 'ItemInventory\ItemInventory_FromQB.xlsx'
if 'Section 1' in config:
	# print('ada')
	if 'Item_inventory_path' in config['Section 1']:
		# print(config['Section 1']['Item_inventory_path'])
		item_inventory_path = config['Section 1']['item_inventory_path']
print(f'pdfexcel4DNwithxlrd->item_inventory_path:{item_inventory_path}')
# config['Section 1']['key1'] = 'myval1'	#change any key or add new one if not there

# with open('ItemReceipt.ini', 'w') as configfile:	# saving to the configfile
#    config.write(configfile)

def read_DN_excel(filename):
	# filename="TCO-DNPG-2307-00752.xls"
	# wb = openpyxl.load_workbook(filename, data_only=True)
	wb = xlrd.open_workbook(filename)

	print(wb)
	print(wb.sheets)
	# for sheet in wb.worksheets:
	#     print(sheet)

	# for sheet in wb.sheets():
	# 	print(sheet.name, sheet.nrows, sheet.ncols)
	# 	for row in range(0, sheet.nrows):
	# 		header = sheet.row_values(row, start_colx=0, end_colx=None)
	# 		print(header)


	# Opening an excel file
	# wb = xw.Book('example_001.xlsx')
	# wb = xw.Book("TCO-DNPG-2307-00752.xlsx")
	# inteminvdf = pd.read_excel('ItemInventory140723.xlsx', index_col="NameFromTaco", usecols=['FullName', 'NameFromTaco'])
	# inteminvdf = pd.read_excel('ItemInventory140723.xlsx', usecols=['FullName', 'NameFromTaco'])
	# print(os.getcwd(), os.path.join(os.getcwd(), item_inventory_path))
	inteminvdf = pd.read_excel(os.path.join(os.getcwd(), item_inventory_path), usecols=['FullName', 'NameFromTaco'])

	# print(inteminvdf)
	# print (inteminvdf.index)
	# print(inteminvdf.loc['TS-W981', 'FullName'])
	# Viewing available
	# sheets in it
	# wks = xw.sheets
	# print("Available sheets :\n", wks)
	# print(type(wks), f'count:{len(wks)}')
	# # Selecting a sheet
	# ws = wks[0]

	# Selecting a value
	# from the selected sheet
	# val = ws.range("C1").value
	# print("A value in sheet1 :", val)

	# Automatic table
	# detection from
	# a cell
	# automatic = ws.range("a10").expand().value
	# print("Automatic Table :", automatic)

	# linescount=wb.sheets[0].range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row
	# print(linescount)

	data=[]
	firstpage=True
	boldataline=False
	# rawdata=wks[0].range("c1").current_region.value
	# print(rawdata)

	# DNRefNum = rawdata[1][3].strip().split("-")[1][-1] + "".join(rawdata[1][3].strip().split("-")[-2:])
	# TxnDate = rawdata[2][3].strip().split(" ")[-1]
	_y=1
	_x=0
	BRANCH = None
	BRANCH_LIST = ['SBY', 'BGR', 'CRB']
	BRANCH_ADDRESS = ['BUDURAN', 'CIBINONG', 'CARUBAN']
	col_value=None
	for col in range(1,wb.sheets()[0].ncols):

		col_value = wb.sheets()[0].col_values(col)
		print(f"{col=} {col_value=} {col_value[1]=}")
		if "TCO-DN" in col_value[1]:
			_x=col
			break
	if _x == 0 :
		return False, "Cannot find TCO-DN cell"
	branchindex = None
	for idx, branchadd in enumerate(BRANCH_ADDRESS):
		for cvalu in col_value:
			if branchadd in cvalu:
				branchindex = idx
				break
		if branchindex!=None:
			break
	if branchindex != None:
		BRANCH = BRANCH_LIST[branchindex]
	print(f'{branchindex = } {BRANCH = }')

	# if "TCO-DN" in wb.sheets()[0].cell(_y, _x).value.strip():


	print(wb.sheets()[0].cell(_y, _x).value.strip())
	DNRefNum = wb.sheets()[0].cell(_y, _x).value.strip().split("-")[1][-1] + "".join(wb.sheets()[0].cell(_y, _x).value.strip().split("-")[-2:])
	# print(DNRefNum)
	TxnDate = wb.sheets()[0].cell(_y+1, _x).value.strip().split(" ")[-1]
	Memo = DNRefNum
	print(f'pdfexcel4DNwithxlrd.py->DNRefNum:{DNRefNum}, TxnDate:{TxnDate} {BRANCH = }')
	DeliveryNotedict={'DNRefNum':DNRefNum, 'TxnDate':TxnDate, 'Memo': Memo, 'BRANCH':BRANCH}
	wks=wb.sheets()
	for sheet in wks:
		# rawdata=ws.range("c1").current_region.value
		# print(rawdata)
		# print()
		irow=1
		movetonextpage=False
		for idxrow in range(0, sheet.nrows):
			row = sheet.row_values(idxrow, start_colx=0, end_colx=None)
			# print(row)

		# for row in rawdata:
			# print(f'irow={irow}', row)
			irow+=1
			# print()
			if not None and row[0]=="Item No" :
				if firstpage:
					# print('firstpage')
					if len(row)==5:
						data.append(row)
					elif len(row)>5:
						temp_ =[]
						for _ in row:
							if _ != None:
								temp_.append(_)
						if len(row)==5:
							data.append(temp_)
						else:
							print("HEADER ERROR!!!")
							data.append(['Item No', 'Description', 'QuantityUOM', 'No.SO', 'LPN No.'])

					boldataline=True
					firstpage=False
					# continue
				else:
					# print('Not firstpage')
					# data.append(row)
					boldataline=True
					firstpage=False
			elif not movetonextpage:
				# print(boldataline, row)
				if boldataline:
					if row[0] is not None:
						if not None and row[0].startswith('Jenis Barang'):
							# print('ada Jenis Barang')
							pass
						elif row[0].startswith('Sub Total') and not None:
							# print('sub total found')
							boldataline=False
							movetonextpage=True
							continue
						elif row[3] is not None:
							# if row[3].startswith('Hormat'):
							if 'Hormat Kami,' in row:
								# print('hormatkami found')
								boldataline=False
								movetonextpage=True
								continue
							else:
								data.append(row)
						else:
							data.append(row)
					elif row[3] is not None:
						# if row[3].startswith('Hormat'):
						if 'Hormat Kami,' in row:
							boldataline=False
							movetonextpage=True
							# continue
						else:
							data.append(row)
					else:
						data.append(row)

	print (f'data: {data}')

	for idx, x in enumerate(data):
		for colidx, col in enumerate(x):	### change the empty cell into None
			if col == "":
				data[idx][colidx]=None
	coly=0
	lenList = 0
	xylist=[]
	for idx, x in enumerate(data):
		print(idx, x)
		xylist=[]
		if len(x)>5 and (idx % 2)==1 :
			print(idx,x)
			lenList = len(x)
			for idy, y in enumerate(x):
				if y == None:
					coly=idy

				else:
					xylist.append(y)
			if len(xylist)==5:
				data[idx]=xylist
		elif len(x)>5 and (idx % 2)==0 and coly==0:
			pass
			print("Different page, seconde line have different column width (6)")
			boloddcolumn=True
			for _ in x[:len(x)-2]:
				print(_)
				if _ != None:
					boloddcolumn=False
			print(boloddcolumn)
			if boloddcolumn:
				del x[0]
			print(x)

		elif coly != 0 and lenList == len(x):
			for idy, y in enumerate(x):
				if idy!=coly:
					xylist.append(y)
			data[idx]=xylist
			coly=0
			lenList = 0
		print(idx, data[idx])

	print(f'len data={len(data)}')

	for idx, x in enumerate(data):
		# print(idx, x)
		if x[0] !=None and x[0].upper() == "ET-06/A1.":		### Change the source from "ET-06/A1." to "ET-06/A1.BOX_100" and replace the UOM from "BOX_100" to "BOX"
			print(idx, x)
			x[0] = "ET-06/A1.BOX_100"
			x[2] = x[2].replace("BOX_100", "BOX")
			print(idx, x)

	newdata=[]

	templist=[]
	for idx, dt in enumerate(data):
		if dt[0] != 'Item No':
			print(idx,'not item', dt)
			if dt[0] is None:
				if dt[1]:
					templist[1]+=" " + str(dt[1])
				if dt[3]:
					templist.append(dt[3])
					templist[3]=templist[3].split("/")[0].strip()
					# templist[3]+=" " + dt[3]
				if dt[4]:
					templist[4]+=" " + dt[4]
				newdata.append(templist)

				templist=[]
			else:
				uom=dt[2].split(" ")[-1]
				# print(uom)
				dt.append(uom)
				dt[2]=int(dt[2].split(".")[0])
				templist=dt
				# print(templist)

	for idx, x in enumerate(newdata):
		pass
		print(idx, x)
	print(f'len newdata={len(newdata)}')


	df=pd.DataFrame(newdata, columns=['Item No', 'Description', 'Quantity', 'No.SO', 'LPN No.', 'UOM', 'Ext.Doc.No'])#, columns=data[0]+"UOM")
	# print(df)
	# df=df.groupby(['No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Item No', 'No.SO'])#.sort_values(by=['No.SO'])
	df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No'])
	df['NameFromTaco']=df['Item No']
	# print(df)
	# df['FullName'] = inteminvdf.loc[df['Item No'],'FullName']
	# df=df.merge(inteminvdf, how="left")
	df=df.merge(inteminvdf, left_on=df['NameFromTaco'].str.upper(), right_on=inteminvdf['NameFromTaco'].str.upper(),  how="left")

	print(df['FullName'].isnull())
	print(df['FullName'])
	# print(df)
	if df['FullName'].isnull().sum() > 0:

		print("Cannot Find Item FullName")
		listitemNoFullName = df.loc[df['FullName'].isnull()].values.tolist()
		df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM'])
		# print(df)
		# print(listitemNoFullName)
		# print(listitemNoFullName)
		return False, listitemNoFullName
	else:
		df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM', 'FullName'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No'])
		df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM'])
		# print(df)
		lst = df.to_dict('records')
		# print(lst)
		DeliveryNotedict['lines']=lst
		# print(f'pdfexcel4DNwithxlrd.py->DeliveryNotedict:{DeliveryNotedict}')
		return True, DeliveryNotedict

if __name__=="__main__":
	# filename = "DN_Excel_files\TCO-DNPG-2307-00048.xls"
	# read_DN_excel(filename)
	# filename = "DN_Excel_files\TCO-DNPG-2307-00079.xls"
	# read_DN_excel(filename)
	# filename = "DN_Excel_files\TCO-DNPR-2305-00305.xls"
	# read_DN_excel(filename)
	# filename = "DN_Excel_files\TCO-DNPG-2307-01407.xls"
    filename = "DN_Excel_files\TCO-DNPG-2310-00337.xls"
    status, retdict = read_DN_excel(filename)
    print(status)
    print(retdict)