#!/usr/bin/python

import os, csv, sys
from GotermSummarization import *

#Data of Go Ontology structure and gene_Goterm association
weightGographData = 'newWeightedPubMedGO.xml'
geneGotermAssociationData = 'gene_association.sgd'

#Create a GoGraph object (Node: every time you use the gotermSummarization(), you need to create a new object)
G = GoGraph(weightGographData,geneGotermAssociationData)

#A list of genes are summarized
GeneList = ['ADH4', 'AFR1', 'AGA1', 'AGA2', 'AQR1', 'ARO10', 'ASG7', 'ASP3-3', 'AXL1', 'BAR1', 'BEM2', 'BNA1', 'CDC20', 'CDC24', 'CHS1', 'CHS7', 'CIK1', 'CLB1', 'CPR8', 'DDR48', 'ECM18', 'ENA1', 'ERG24', 'ERG6', 'FAA3', 'FAR1', 'FIG1', 'FIG2', 'FIT2', 'FRE2', 'FUS1', 'FUS2', 'FUS3', 'GAS2', 'GAT1', 'GFA1', 'GIC2', 'GIT1', 'GPA1', 'GSC2', 'GYP8', 'HOF1', 'HOR2', 'HO', 'HSP31', 'HST3', 'HYM1', 'ICL1', 'ICS2', 'IDH1', 'KAR4', 'KAR5', 'KRE6', 'KSS1', 'KTR2', 'LEU2', 'LSB3', 'LYS2', 'MDJ2', 'MET1', 'MID2', 'MNN1', 'MNT2', 'MRH1', 'MSB2', 'MSG5', 'NDE1', 'NDJ1', 'NRM1', 'PCL2', 'PCL7', 'PCL9', 'PET9', 'PGU1', 'PHD1', 'PHO89', 'PRM10', 'PRM1', 'PRM2', 'PRM3', 'PRM4', 'PRM5', 'PRM6', 'PRM8', 'PRM9', 'PRP39', 'PRY2', 'PST1', 'RAX2', 'RDI1', 'RHR2', 'SAM35', 'SCW10', 'SIL1', 'SLI15', 'SRL1', 'SRL3', 'SST2', 'STE12', 'STE2', 'STE4', 'SUP45', 'SVS1', 'TAT1', 'TGS1', 'TIP1', 'TYE7', 'UTR2', 'WSC2', 'WSC3', 'YAR009C', 'YAR068W', 'YBR012W-B', 'YBR071W', 'YCL056C', 'YDR124W', 'YDR249C', 'YDR366C', 'YER138C', 'YER158C', 'YER160C', 'YGR122W', 'YHB1', 'YIL080W', 'YIL082W-A', 'YIL083C', 'YIL169C', 'YJR027W', 'YJR029W', 'YLL054C', 'YLR042C', 'YLR108C', 'YLR414C', 'YML039W', 'YML119W', 'YMR045C', 'YMR050C', 'YNL208W', 'YRO2', 'ZIP1']


#Using Go term to summarize the list of gene.
Result =  G.gotermSummarization(GeneList,0.05,3)

#0.05 is the threshold of P_Value of the Go term node in final result.
#5 is the minimum number of genes in the Go term node in final result.
#The result has the format: [value_0,{Goterm_1:[a list of genes_1,value_1],Goterm_2:[a list of genes_2,value_2],....}]
# value_0: the total information lost for the summarization.
# Goterm_1: Goterm ID that is used to summarize the given list of gene.
# a list of genes_1: a subset of genes (in the given list of gene) that are annotated by Goterm_1.
# value_1: the level of Goterm_1 on the Go Ontology. The root note is in level 1.



#print the result
print 'Total information lost is', Result[0]
for goterm in Result[1]:
	print goterm, Result[1][goterm]