# Python program to # access Excel files # Import required library # import xlwings as xw import pandas as pd # from openpyxl import load_workbook # import openpyxl import xlrd import configparser import os config = configparser.ConfigParser() config.read('ItemReceipt.ini') item_inventory_path = 'ItemInventory\ItemInventory_FromQB.xlsx' if 'Section 1' in config: # print('ada') if 'Item_inventory_path' in config['Section 1']: # print(config['Section 1']['Item_inventory_path']) item_inventory_path = config['Section 1']['item_inventory_path'] print(f'pdfexcel4DNwithxlrd->item_inventory_path:{item_inventory_path}') # config['Section 1']['key1'] = 'myval1' #change any key or add new one if not there # with open('ItemReceipt.ini', 'w') as configfile: # saving to the configfile # config.write(configfile) def read_DN_excel(filename): # filename="TCO-DNPG-2307-00752.xls" # wb = openpyxl.load_workbook(filename, data_only=True) wb = xlrd.open_workbook(filename) print(wb) print(wb.sheets) # for sheet in wb.worksheets: # print(sheet) # for sheet in wb.sheets(): # print(sheet.name, sheet.nrows, sheet.ncols) # for row in range(0, sheet.nrows): # header = sheet.row_values(row, start_colx=0, end_colx=None) # print(header) # Opening an excel file # wb = xw.Book('example_001.xlsx') # wb = xw.Book("TCO-DNPG-2307-00752.xlsx") # inteminvdf = pd.read_excel('ItemInventory140723.xlsx', index_col="NameFromTaco", usecols=['FullName', 'NameFromTaco']) # inteminvdf = pd.read_excel('ItemInventory140723.xlsx', usecols=['FullName', 'NameFromTaco']) # print(os.getcwd(), os.path.join(os.getcwd(), item_inventory_path)) inteminvdf = pd.read_excel(os.path.join(os.getcwd(), item_inventory_path), usecols=['FullName', 'NameFromTaco']) # print(inteminvdf) # print (inteminvdf.index) # print(inteminvdf.loc['TS-W981', 'FullName']) # Viewing available # sheets in it # wks = xw.sheets # print("Available sheets :\n", wks) # print(type(wks), f'count:{len(wks)}') # # Selecting a sheet # ws = wks[0] # Selecting a value # from the selected sheet # val = ws.range("C1").value # print("A value in sheet1 :", val) # Automatic table # detection from # a cell # automatic = ws.range("a10").expand().value # print("Automatic Table :", automatic) # linescount=wb.sheets[0].range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row # print(linescount) data=[] firstpage=True boldataline=False # rawdata=wks[0].range("c1").current_region.value # print(rawdata) # DNRefNum = rawdata[1][3].strip().split("-")[1][-1] + "".join(rawdata[1][3].strip().split("-")[-2:]) # TxnDate = rawdata[2][3].strip().split(" ")[-1] _y=1 _x=0 for col in range(1,wb.sheets()[0].ncols): col_value = wb.sheets()[0].col_values(col) print("col=",col, col_value[1]) if "TCO-DN" in col_value[1]: _x=col if _x == 0 : return False, "Cannot find TCO-DN cell" # if "TCO-DN" in wb.sheets()[0].cell(_y, _x).value.strip(): print(wb.sheets()[0].cell(_y, _x).value.strip()) DNRefNum = wb.sheets()[0].cell(_y, _x).value.strip().split("-")[1][-1] + "".join(wb.sheets()[0].cell(_y, _x).value.strip().split("-")[-2:]) # print(DNRefNum) TxnDate = wb.sheets()[0].cell(_y+1, _x).value.strip().split(" ")[-1] Memo = DNRefNum print(f'pdfexcel4DNwithxlrd.py->DNRefNum:{DNRefNum}, TxnDate:{TxnDate}') DeliveryNotedict={'DNRefNum':DNRefNum, 'TxnDate':TxnDate, 'Memo': Memo} wks=wb.sheets() for sheet in wks: # rawdata=ws.range("c1").current_region.value # print(rawdata) # print() irow=1 movetonextpage=False for idxrow in range(0, sheet.nrows): row = sheet.row_values(idxrow, start_colx=0, end_colx=None) # print(row) # for row in rawdata: # print(f'irow={irow}', row) irow+=1 # print() if not None and row[0]=="Item No" : if firstpage: # print('firstpage') if len(row)==5: data.append(row) elif len(row)>5: temp_ =[] for _ in row: if _ != None: temp_.append(_) if len(row)==5: data.append(temp_) else: print("HEADER ERROR!!!") data.append(['Item No', 'Description', 'QuantityUOM', 'No.SO', 'LPN No.']) boldataline=True firstpage=False # continue else: # print('Not firstpage') # data.append(row) boldataline=True firstpage=False elif not movetonextpage: # print(boldataline, row) if boldataline: if row[0] is not None: if not None and row[0].startswith('Jenis Barang'): # print('ada Jenis Barang') pass elif row[0].startswith('Sub Total') and not None: # print('sub total found') boldataline=False movetonextpage=True continue elif row[3] is not None: # if row[3].startswith('Hormat'): if 'Hormat Kami,' in row: # print('hormatkami found') boldataline=False movetonextpage=True continue else: data.append(row) else: data.append(row) elif row[3] is not None: # if row[3].startswith('Hormat'): if 'Hormat Kami,' in row: boldataline=False movetonextpage=True # continue else: data.append(row) else: data.append(row) print (f'data: {data}') for idx, x in enumerate(data): for colidx, col in enumerate(x): ### change the empty cell into None if col == "": data[idx][colidx]=None coly=0 lenList = 0 xylist=[] for idx, x in enumerate(data): print(idx, x) xylist=[] if len(x)>5 and (idx % 2)==1 : print(idx,x) lenList = len(x) for idy, y in enumerate(x): if y == None: coly=idy else: xylist.append(y) if len(xylist)==5: data[idx]=xylist elif len(x)>5 and (idx % 2)==0 and coly==0: pass print("Different page, seconde line have different column width (6)") boloddcolumn=True for _ in x[:len(x)-2]: print(_) if _ != None: boloddcolumn=False print(boloddcolumn) if boloddcolumn: del x[0] print(x) elif coly != 0 and lenList == len(x): for idy, y in enumerate(x): if idy!=coly: xylist.append(y) data[idx]=xylist coly=0 lenList = 0 print(idx, data[idx]) print(f'len data={len(data)}') for idx, x in enumerate(data): # print(idx, x) if x[0] !=None and x[0].upper() == "ET-06/A1.": ### Change the source from "ET-06/A1." to "ET-06/A1.BOX_100" and replace the UOM from "BOX_100" to "BOX" print(idx, x) x[0] = "ET-06/A1.BOX_100" x[2] = x[2].replace("BOX_100", "BOX") print(idx, x) newdata=[] templist=[] for idx, dt in enumerate(data): if dt[0] != 'Item No': # print(idx,'not item', dt) if dt[0] is None: if dt[1]: templist[1]+=" " + dt[1] if dt[3]: templist.append(dt[3]) templist[3]=templist[3].split("/")[0].strip() # templist[3]+=" " + dt[3] if dt[4]: templist[4]+=" " + dt[4] newdata.append(templist) templist=[] else: uom=dt[2].split(" ")[-1] # print(uom) dt.append(uom) dt[2]=int(dt[2].split(".")[0]) templist=dt # print(templist) for idx, x in enumerate(newdata): pass print(idx, x) print(f'len newdata={len(newdata)}') df=pd.DataFrame(newdata, columns=['Item No', 'Description', 'Quantity', 'No.SO', 'LPN No.', 'UOM', 'Ext.Doc.No'])#, columns=data[0]+"UOM") # print(df) # df=df.groupby(['No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Item No', 'No.SO'])#.sort_values(by=['No.SO']) df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No']) df['NameFromTaco']=df['Item No'] # print(df) # df['FullName'] = inteminvdf.loc[df['Item No'],'FullName'] # df=df.merge(inteminvdf, how="left") df=df.merge(inteminvdf, left_on=df['NameFromTaco'].str.upper(), right_on=inteminvdf['NameFromTaco'].str.upper(), how="left") print(df['FullName'].isnull()) print(df['FullName']) # print(df) if df['FullName'].isnull().sum() > 0: print("Cannot Find Item FullName") listitemNoFullName = df.loc[df['FullName'].isnull()].values.tolist() df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM']) # print(df) # print(listitemNoFullName) # print(listitemNoFullName) return False, listitemNoFullName else: df=df.groupby(['Ext.Doc.No', 'No.SO','Item No', 'UOM', 'FullName'])['Quantity'].sum().reset_index().sort_values(by=['Ext.Doc.No','No.SO', 'Item No']) df=df.reindex(columns=['Ext.Doc.No', 'No.SO', 'Item No', 'FullName', 'Quantity', 'UOM']) # print(df) lst = df.to_dict('records') # print(lst) DeliveryNotedict['lines']=lst # print(f'pdfexcel4DNwithxlrd.py->DeliveryNotedict:{DeliveryNotedict}') return True, DeliveryNotedict if __name__=="__main__": # filename = "DN_Excel_files\TCO-DNPG-2307-00048.xls" # read_DN_excel(filename) # filename = "DN_Excel_files\TCO-DNPG-2307-00079.xls" # read_DN_excel(filename) # filename = "DN_Excel_files\TCO-DNPR-2305-00305.xls" # read_DN_excel(filename) # filename = "DN_Excel_files\TCO-DNPG-2307-01407.xls" filename = "DN_Excel_files\TCO-DNPG-2310-00337.xls" status, retdict = read_DN_excel(filename) print(status) print(retdict)