-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
73 lines (65 loc) · 2.65 KB
/
main.py
File metadata and controls
73 lines (65 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from nltk import word_tokenize, pos_tag
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer as punk
from api import *
file_name = " FILENAME.txt " #FILENAME with .txt
htmlname = " HTMLNAME " #Target HTML name without .html
################# Functions #####################
def readfile(file):
try:
with open(file,"r") as f:
sample_text=f.read()
except:
print("Input File could not be found!")
sample_text="File" #If the file specified could not be found
return sample_text
def printtokenlist(tokened): #input sentence-tokenized list
keywords=[]
try:
for i in tokened:
words = word_tokenize(i) #word tokenise each sentence
tagged = pos_tag(words) #tag for each word
#print(tagged)
for j in tagged:
if j[1] in keyparam and len(j[0])>1 and j[0] not in keywords: #checking tag and removing single characters and soring in a list
#print(j,sep=' ')
keywords.append(j[0])
except Exception as e:
print(str(e))
keywords=[]
return keywords
def printtoken(tokened): #input sentence-tokenized list
keywords=set()
try:
for i in tokened:
words = word_tokenize(i) #word tokenise each sentence
tagged = pos_tag(words) #tag for each word
#print(tagged)
for j in tagged:
if j[1] in keyparam and len(j[0])>1: #checking tag and removing single charachters
#print(j,sep=' ')
keywords.add(j[0])
except Exception as e:
print(str(e))
keywords=set()
return keywords
#'IN' deleted for crime to add 'at'
#keyparam=['CD','FW','NN','NNP','NNS','NP','NPS','VB','VBD','VBG','VBN','VBP','VBZ','VH','VHD','VHG','VHN','VHP','VHZ','VV','VVD','VVG','VVN','VVP','VVZ','WP','WP$','WRB']
keyparam=["NNP"]
"""
#Check 'posid.txt' to select key param for reference to types of keywords to include eg. nouns,verbs
"""
#################### MAIN #####################
sample_text=readfile(file_name)
cust=punk()
print("Tokenizing...")
tokenized = cust.tokenize(sample_text)
print("Selecting keys...")
keys=printtokenlist(tokenized) #keys=filtered keywords in order
print(keys)
hyperlo=CreateHTML(htmlname) #hyperlo: Object for file handling and passing query
print("Fetching request...")
for i in keys:
print("Fetching "+i+"...")
hyperlo.addquestion(i) #fetching q and a for each keyword
hyperlo.endquestion() #closing html document