Wednesday, June 29, 2022

Create Index for text file (Idea from 9 Algorithms that changed the world - Google PageRank)

# Contents of Names.txt

Abdul Abdul Babu Geetha Joseph Leela Mary Mohammed Muhammed Suresh

Abdul Rajan Thomas Geetha Leela Mini Omana Rajesh Usha

Abdul Biju Fathima Geetha Muhammad Sindhu


filename = "Names.txt"

temp = "Temp.txt"


S = ''

def Eliminate(): # Add only alphabets, space & newline to string S

  inn = open(filename)

  s = inn.read()

  global S

  for i in s:

    if i.isalpha() or i == ' ' or i == '\n':

      S += i

    if i == '-':  # Hyphenated words become 2 words

      S += ' '

  inn.close()


def WriteToTemp(): # Write S to Temporary file

  out = open(temp, 'w')

  out.write(S)

  out.close()


def Indexing(): # 1st Indexing method

  Index = {}    # File index

  lineNo = 0

  inn = open(temp)

  

  while True:

    D = {}  # Line index

    line = inn.readline()

    if line == '':

      break

    lineNo += 1

    L = line.split()

    for word in L:  # Build line index

      if word in D.keys():

        D[word] += 1

      else:

        D[word] = 1


    for k, v in D.items():  # Use line index to

      if k in Index.keys(): # build file index

        Index[k].append(lineNo)

      else:

        Index[k] = [lineNo]


  for k, v in Index.items():

    print(k, v)


def Indexing(): # 2nd Indexing method

  lineNo = 0

  inn = open(temp)

  D = {}

  

  while True:

    line = inn.readline()

    if line == '':

      break

    lineNo += 1

    L = line.split()

    for word in L:  # Build index

      if word not in D.keys():

        D[word] = {lineNo}

      else:

        D[word].add(lineNo)


  for k, v in D.items():

    print(k, v)


Eliminate()

WriteToTemp()

Indexing()


No comments:

Post a Comment