dasaproject/pdfexcel4DNwithxlrd.py
2025-05-19 14:01:22 +07:00

327 lines
9.5 KiB
Python

# Python program to
# access Excel files
# Import required library
# import xlwings as xw
import pandas as pd
# from openpyxl import load_workbook
# import openpyxl
import xlrd
import configparser
import os
config = configparser.ConfigParser()
config.read('ItemReceipt.ini')
item_inventory_path = 'ItemInventory\ItemInventory_FromQB.xlsx'
if 'Section 1' in config:
# print('ada')
if 'Item_inventory_path' in config['Section 1']:
# print(config['Section 1']['Item_inventory_path'])
item_inventory_path = config['Section 1']['item_inventory_path']
print(f'pdfexcel4DNwithxlrd->item_inventory_path:{item_inventory_path}')
# config['Section 1']['key1'] = 'myval1' #change any key or add new one if not there
# with open('ItemReceipt.ini', 'w') as configfile: # saving to the configfile
# config.write(configfile)
def read_DN_excel(filename):
# filename="TCO-DNPG-2307-00752.xls"
# wb = openpyxl.load_workbook(filename, data_only=True)
wb = xlrd.open_workbook(filename)
print(wb)
print(wb.sheets)
# for sheet in wb.worksheets:
# print(sheet)
# for sheet in wb.sheets():
# print(sheet.name, sheet.nrows, sheet.ncols)
# for row in range(0, sheet.nrows):
# header = sheet.row_values(row, start_colx=0, end_colx=None)
# print(header)
# Opening an excel file
# wb = xw.Book('example_001.xlsx')
# wb = xw.Book("TCO-DNPG-2307-00752.xlsx")
# inteminvdf = pd.read_excel('ItemInventory140723.xlsx', index_col="NameFromTaco", usecols=['FullName', 'NameFromTaco'])
# inteminvdf = pd.read_excel('ItemInventory140723.xlsx', usecols=['FullName', 'NameFromTaco'])
# print(os.getcwd(), os.path.join(os.getcwd(), item_inventory_path))
inteminvdf = pd.read_excel(os.path.join(os.getcwd(), item_inventory_path), usecols=['FullName', 'NameFromTaco'])
# print(inteminvdf)
# print (inteminvdf.index)
# print(inteminvdf.loc['TS-W981', 'FullName'])
# Viewing available
# sheets in it
# wks = xw.sheets
# print("Available sheets :\n", wks)
# print(type(wks), f'count:{len(wks)}')
# # Selecting a sheet
# ws = wks[0]
# Selecting a value
# from the selected sheet
# val = ws.range("C1").value
# print("A value in sheet1 :", val)
# Automatic table
# detection from
# a cell
# automatic = ws.range("a10").expand().value
# print("Automatic Table :", automatic)
# linescount=wb.sheets[0].range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row
# print(linescount)
data=[]
firstpage=True
boldataline=False
# rawdata=wks[0].range("c1").current_region.value
# print(rawdata)
# DNRefNum = rawdata[1][3].strip().split("-")[1][-1] + "".join(rawdata[1][3].strip().split("-")[-2:])
# TxnDate = rawdata[2][3].strip().split(" ")[-1]
_y=1
_x=0
BRANCH = None
BRANCH_LIST = ['SBY', 'BGR', 'CRB']
BRANCH_ADDRESS = ['BUDURAN', 'CIBINONG', 'CARUBAN']
col_value=None
for col in range(1,wb.sheets()[0].ncols):
col_value = wb.sheets()[0].col_values(col)
print(f"{col=} {col_value=} {col_value[1]=}")
if "TCO-DN" in col_value[1]:
_x=col
break
if _x == 0 :
return False, "Cannot find TCO-DN cell"
branchindex = None
for idx, branchadd in enumerate(BRANCH_ADDRESS):
for cvalu in col_value:
if branchadd in cvalu:
branchindex = idx
break
if branchindex!=None:
break
if branchindex != None:
BRANCH = BRANCH_LIST[branchindex]
print(f'{branchindex = } {BRANCH = }')
# if "TCO-DN" in wb.sheets()[0].cell(_y, _x).value.strip():
print(wb.sheets()[0].cell(_y, _x).value.strip())
DNRefNum = wb.sheets()[0].cell(_y, _x).value.strip().split("-")[1][-1] + "".join(wb.sheets()[0].cell(_y, _x).value.strip().split("-")[-2:])
# print(DNRefNum)
TxnDate = wb.sheets()[0].cell(_y+1, _x).value.strip().split(" ")[-1]
Memo = DNRefNum
print(f'pdfexcel4DNwithxlrd.py->DNRefNum:{DNRefNum}, TxnDate:{TxnDate} {BRANCH = }')
DeliveryNotedict={'DNRefNum':DNRefNum, 'TxnDate':TxnDate, 'Memo': Memo, 'BRANCH':BRANCH}
wks=wb.sheets()
for sheet in wks:
# rawdata=ws.range("c1").current_region.value
# print(rawdata)
# print()
irow=1
movetonextpage=False
for idxrow in range(0, sheet.nrows):
row = sheet.row_values(idxrow, start_colx=0, end_colx=None)
# print(row)
# for row in rawdata:
# print(f'irow={irow}', row)
irow+=1
# print()
if not None and row[0]=="Item No" :
if firstpage:
# print('firstpage')
if len(row)==5:
data.append(row)
elif len(row)>5:
temp_ =[]
for _ in row:
if _ != None:
temp_.append(_)
if len(row)==5:
data.append(temp_)
else:
print("HEADER ERROR!!!")
data.append(['Item No', 'Description', 'QuantityUOM', 'No.SO', 'LPN No.'])
boldataline=True
firstpage=False
# continue
else:
# print('Not firstpage')
# data.append(row)
boldataline=True
firstpage=False
elif not movetonextpage:
# print(boldataline, row)
if boldataline:
if row[0] is not None:
if not None and row[0].startswith('Jenis Barang'):
# print('ada Jenis Barang')
pass
elif row[0].startswith('Sub Total') and not None:
# print('sub total found')
boldataline=False
movetonextpage=True
continue
elif row[3] is not None:
# if row[3].startswith('Hormat'):
if 'Hormat Kami,' in row:
# print('hormatkami found')
boldataline=False
movetonextpage=True
continue
else:
data.append(row)
else:
data.append(row)
elif row[3] is not None:
# if row[3].startswith('Hormat'):
if 'Hormat Kami,' in row:
boldataline=False
movetonextpage=True
# continue
else:
data.append(row)
else:
data.append(row)
print (f'data: {data}')
for idx, x in enumerate(data):
for colidx, col in enumerate(x): ### change the empty cell into None
if col == "":
data[idx][colidx]=None
coly=0
lenList = 0
xylist=[]
for idx, x in enumerate(data):
print(idx, x)
xylist=[]
if len(x)>5 and (idx % 2)==1 :
print(idx,x)
lenList = len(x)
for idy, y in enumerate(x):
if y == None:
coly=idy
else:
xylist.append(y)
if len(xylist)==5:
data[idx]=xylist
elif len(x)>5 and (idx % 2)==0 and coly==0:
pass
print("Different page, seconde line have different column width (6)")
boloddcolumn=True
for _ in x[:len(x)-2]:
print(_)
if _ != None:
boloddcolumn=False
print(boloddcolumn)
if boloddcolumn:
del x[0]
print(x)
elif coly != 0 and lenList == len(x):
for idy, y in enumerate(x):
if idy!=coly:
xylist.append(y)
data[idx]=xylist
coly=0
lenList = 0
print(idx, data[idx])
print(f'len data={len(data)}')
for idx, x in enumerate(data):
# print(idx, x)
if x[0] !=None and x[0].upper() == "ET-06/A1.": ### Change the source from "ET-06/A1." to "ET-06/A1.BOX_100" and replace the UOM from "BOX_100" to "BOX"
print(idx, x)
x[0] = "ET-06/A1.BOX_100"
x[2] = x[2].replace("BOX_100", "BOX")
print(idx, x)
newdata=[]
templist=[]
for idx, dt in enumerate(data):
if dt[0] != 'Item No':
print(idx,'not item', dt)
if dt[0] is None:
if dt[1]:
templist[1]+=" " + str(dt[1])
if dt[3]:
templist.append(dt[3])
templist[3]=templist[3].split("/")[0].strip()
# templist[3]+=" " + dt[3]
if dt[4]:
templist[4]+=" " + dt[4]
newdata.append(templist)
templist=[]
else:
uom=dt[2].split(" ")[-1]
# print(uom)
dt.append(uom)
dt[2]=int(dt[2].split(".")[0])
templist=dt
# print(templist)
for idx, x in enumerate(newdata):
pass
print(idx, x)
print(f'len newdata={len(newdata)}')
df=pd.DataFrame(newdata, columns=['Item No', 'Description', 'Quantity', 'No.SO', 'LPN No.', 'UOM', 'Ext.Doc.No'])#, columns=data[0]+"UOM")
# print(df)
# df=df.groupby(['No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Item No', 'No.SO'])#.sort_values(by=['No.SO'])
df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No'])
df['NameFromTaco']=df['Item No']
# print(df)
# df['FullName'] = inteminvdf.loc[df['Item No'],'FullName']
# df=df.merge(inteminvdf, how="left")
df=df.merge(inteminvdf, left_on=df['NameFromTaco'].str.upper(), right_on=inteminvdf['NameFromTaco'].str.upper(), how="left")
print(df['FullName'].isnull())
print(df['FullName'])
# print(df)
if df['FullName'].isnull().sum() > 0:
print("Cannot Find Item FullName")
listitemNoFullName = df.loc[df['FullName'].isnull()].values.tolist()
df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM'])
# print(df)
# print(listitemNoFullName)
# print(listitemNoFullName)
return False, listitemNoFullName
else:
df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM', 'FullName'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No'])
df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM'])
# print(df)
lst = df.to_dict('records')
# print(lst)
DeliveryNotedict['lines']=lst
# print(f'pdfexcel4DNwithxlrd.py->DeliveryNotedict:{DeliveryNotedict}')
return True, DeliveryNotedict
if __name__=="__main__":
# filename = "DN_Excel_files\TCO-DNPG-2307-00048.xls"
# read_DN_excel(filename)
# filename = "DN_Excel_files\TCO-DNPG-2307-00079.xls"
# read_DN_excel(filename)
# filename = "DN_Excel_files\TCO-DNPR-2305-00305.xls"
# read_DN_excel(filename)
# filename = "DN_Excel_files\TCO-DNPG-2307-01407.xls"
filename = "DN_Excel_files\TCO-DNPG-2310-00337.xls"
status, retdict = read_DN_excel(filename)
print(status)
print(retdict)