Document (#16514)

Author
Paijmans, H.
Title
Gravity wells of meaning : detecting information rich passages in scientific texts
Source
Journal of documentation. 53(1997) no.5, S.520-536
Year
1997
Abstract
Presents research in which 4 term weigthing schemes were used to detect information rich passages in texts and the results compared. Demonstrates that word categories and frequency derived weights have a close correlation but that weighting according to the first mention theory or the cue method shows no correlation with frequency based weights
Content
Vgl. auch unter: http://www.emeraldinsight.com/10.1108/EUM0000000007209.
Theme
Volltextretrieval

Similar documents (content)

  1. Li, X.; Zhang, A.; Li, C.; Ouyang, J.; Cai, Y.: Exploring coherent topics by topic modeling with term weighting (2018) 0.30
    0.29753876 = sum of:
      0.29753876 = product of:
        0.82649654 = sum of:
          0.010880623 = weight(abstract_txt:that in 45) [ClassicSimilarity], result of:
            0.010880623 = score(doc=45,freq=4.0), product of:
              0.036806498 = queryWeight, product of:
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0155634275 = queryNorm
              0.2956169 = fieldWeight in 45, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.039263092 = weight(abstract_txt:term in 45) [ClassicSimilarity], result of:
            0.039263092 = score(doc=45,freq=3.0), product of:
              0.07564518 = queryWeight, product of:
                1.0137092 = boost
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.0155634275 = queryNorm
              0.5190429 = fieldWeight in 45, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.02585169 = weight(abstract_txt:compared in 45) [ClassicSimilarity], result of:
            0.02585169 = score(doc=45,freq=1.0), product of:
              0.08257044 = queryWeight, product of:
                1.0590954 = boost
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.0155634275 = queryNorm
              0.31308648 = fieldWeight in 45, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.05728367 = weight(abstract_txt:word in 45) [ClassicSimilarity], result of:
            0.05728367 = score(doc=45,freq=3.0), product of:
              0.09730731 = queryWeight, product of:
                1.149729 = boost
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.0155634275 = queryNorm
              0.58868825 = fieldWeight in 45, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.060367428 = weight(abstract_txt:schemes in 45) [ClassicSimilarity], result of:
            0.060367428 = score(doc=45,freq=3.0), product of:
              0.10076894 = queryWeight, product of:
                1.1700007 = boost
                5.5339513 = idf(docFreq=476, maxDocs=44421)
                0.0155634275 = queryNorm
              0.5990678 = fieldWeight in 45, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.5339513 = idf(docFreq=476, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.17087258 = weight(abstract_txt:weighting in 45) [ClassicSimilarity], result of:
            0.17087258 = score(doc=45,freq=6.0), product of:
              0.16004047 = queryWeight, product of:
                1.4744759 = boost
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.0155634275 = queryNorm
              1.0676836 = fieldWeight in 45, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.10422763 = weight(abstract_txt:texts in 45) [ClassicSimilarity], result of:
            0.10422763 = score(doc=45,freq=2.0), product of:
              0.20916414 = queryWeight, product of:
                2.3838634 = boost
                5.6376824 = idf(docFreq=429, maxDocs=44421)
                0.0155634275 = queryNorm
              0.49830544 = fieldWeight in 45, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.6376824 = idf(docFreq=429, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.08659143 = weight(abstract_txt:frequency in 45) [ClassicSimilarity], result of:
            0.08659143 = score(doc=45,freq=1.0), product of:
              0.23289415 = queryWeight, product of:
                2.5154579 = boost
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.0155634275 = queryNorm
              0.37180594 = fieldWeight in 45, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
          0.2711584 = weight(abstract_txt:weights in 45) [ClassicSimilarity], result of:
            0.2711584 = score(doc=45,freq=3.0), product of:
              0.34563437 = queryWeight, product of:
                3.0644054 = boost
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.0155634275 = queryNorm
              0.7845238 = fieldWeight in 45, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.0625 = fieldNorm(doc=45)
        0.36 = coord(9/25)
    
  2. Dang, E.K.F.; Luk, R.W.P.; Allan, J.; Ho, K.S.; Chung, K.F.L.; Lee, D.L.: ¬A new context-dependent term weight computed by boost and discount using relevance information (2010) 0.21
    0.2124412 = sum of:
      0.2124412 = product of:
        0.7587186 = sum of:
          0.059975363 = weight(abstract_txt:term in 120) [ClassicSimilarity], result of:
            0.059975363 = score(doc=120,freq=7.0), product of:
              0.07564518 = queryWeight, product of:
                1.0137092 = boost
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.0155634275 = queryNorm
              0.7928511 = fieldWeight in 120, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.0082325535 = weight(abstract_txt:information in 120) [ClassicSimilarity], result of:
            0.0082325535 = score(doc=120,freq=2.0), product of:
              0.038505405 = queryWeight, product of:
                1.0228186 = boost
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0155634275 = queryNorm
              0.21380253 = fieldWeight in 120, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.02585169 = weight(abstract_txt:compared in 120) [ClassicSimilarity], result of:
            0.02585169 = score(doc=120,freq=1.0), product of:
              0.08257044 = queryWeight, product of:
                1.0590954 = boost
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.0155634275 = queryNorm
              0.31308648 = fieldWeight in 120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.029642014 = weight(abstract_txt:according in 120) [ClassicSimilarity], result of:
            0.029642014 = score(doc=120,freq=1.0), product of:
              0.09045595 = queryWeight, product of:
                1.1085144 = boost
                5.2431293 = idf(docFreq=637, maxDocs=44421)
                0.0155634275 = queryNorm
              0.32769558 = fieldWeight in 120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.2431293 = idf(docFreq=637, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.06975844 = weight(abstract_txt:weighting in 120) [ClassicSimilarity], result of:
            0.06975844 = score(doc=120,freq=1.0), product of:
              0.16004047 = queryWeight, product of:
                1.4744759 = boost
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.0155634275 = queryNorm
              0.43587998 = fieldWeight in 120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.12245877 = weight(abstract_txt:frequency in 120) [ClassicSimilarity], result of:
            0.12245877 = score(doc=120,freq=2.0), product of:
              0.23289415 = queryWeight, product of:
                2.5154579 = boost
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.0155634275 = queryNorm
              0.525813 = fieldWeight in 120, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
          0.44279978 = weight(abstract_txt:weights in 120) [ClassicSimilarity], result of:
            0.44279978 = score(doc=120,freq=8.0), product of:
              0.34563437 = queryWeight, product of:
                3.0644054 = boost
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.0155634275 = queryNorm
              1.281122 = fieldWeight in 120, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.0625 = fieldNorm(doc=120)
        0.28 = coord(7/25)
    
  3. Wong, S.K.M.; Yao, Y.Y.: ¬An information-theoretic measure of term specifics (1992) 0.19
    0.18903741 = sum of:
      0.18903741 = product of:
        0.59074193 = sum of:
          0.011540643 = weight(abstract_txt:that in 4806) [ClassicSimilarity], result of:
            0.011540643 = score(doc=4806,freq=2.0), product of:
              0.036806498 = queryWeight, product of:
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0155634275 = queryNorm
              0.31354907 = fieldWeight in 4806, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.0832896 = weight(abstract_txt:term in 4806) [ClassicSimilarity], result of:
            0.0832896 = score(doc=4806,freq=6.0), product of:
              0.07564518 = queryWeight, product of:
                1.0137092 = boost
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.0155634275 = queryNorm
              1.1010563 = fieldWeight in 4806, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.794713 = idf(docFreq=998, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.01234883 = weight(abstract_txt:information in 4806) [ClassicSimilarity], result of:
            0.01234883 = score(doc=4806,freq=2.0), product of:
              0.038505405 = queryWeight, product of:
                1.0228186 = boost
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0155634275 = queryNorm
              0.3207038 = fieldWeight in 4806, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.041024324 = weight(abstract_txt:shows in 4806) [ClassicSimilarity], result of:
            0.041024324 = score(doc=4806,freq=1.0), product of:
              0.08572986 = queryWeight, product of:
                1.0791674 = boost
                5.104322 = idf(docFreq=732, maxDocs=44421)
                0.0155634275 = queryNorm
              0.47853017 = fieldWeight in 4806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.104322 = idf(docFreq=732, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.052279726 = weight(abstract_txt:schemes in 4806) [ClassicSimilarity], result of:
            0.052279726 = score(doc=4806,freq=1.0), product of:
              0.10076894 = queryWeight, product of:
                1.1700007 = boost
                5.5339513 = idf(docFreq=476, maxDocs=44421)
                0.0155634275 = queryNorm
              0.51880795 = fieldWeight in 4806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5339513 = idf(docFreq=476, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.05859066 = weight(abstract_txt:derived in 4806) [ClassicSimilarity], result of:
            0.05859066 = score(doc=4806,freq=1.0), product of:
              0.1087235 = queryWeight, product of:
                1.2153027 = boost
                5.7482243 = idf(docFreq=384, maxDocs=44421)
                0.0155634275 = queryNorm
              0.538896 = fieldWeight in 4806, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.7482243 = idf(docFreq=384, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.14797997 = weight(abstract_txt:weighting in 4806) [ClassicSimilarity], result of:
            0.14797997 = score(doc=4806,freq=2.0), product of:
              0.16004047 = queryWeight, product of:
                1.4744759 = boost
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.0155634275 = queryNorm
              0.924641 = fieldWeight in 4806, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.9740796 = idf(docFreq=112, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
          0.18368815 = weight(abstract_txt:frequency in 4806) [ClassicSimilarity], result of:
            0.18368815 = score(doc=4806,freq=2.0), product of:
              0.23289415 = queryWeight, product of:
                2.5154579 = boost
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.0155634275 = queryNorm
              0.7887195 = fieldWeight in 4806, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.948895 = idf(docFreq=314, maxDocs=44421)
                0.09375 = fieldNorm(doc=4806)
        0.32 = coord(8/25)
    
  4. Mengle, S.; Goharian, N.: Passage detection using text classification (2009) 0.18
    0.1773736 = sum of:
      0.1773736 = product of:
        0.886868 = sum of:
          0.009520545 = weight(abstract_txt:that in 3765) [ClassicSimilarity], result of:
            0.009520545 = score(doc=3765,freq=4.0), product of:
              0.036806498 = queryWeight, product of:
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0155634275 = queryNorm
              0.2586648 = fieldWeight in 3765, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0546875 = fieldNorm(doc=3765)
          0.00882243 = weight(abstract_txt:information in 3765) [ClassicSimilarity], result of:
            0.00882243 = score(doc=3765,freq=3.0), product of:
              0.038505405 = queryWeight, product of:
                1.0228186 = boost
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0155634275 = queryNorm
              0.22912185 = fieldWeight in 3765, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0546875 = fieldNorm(doc=3765)
          0.043268014 = weight(abstract_txt:categories in 3765) [ClassicSimilarity], result of:
            0.043268014 = score(doc=3765,freq=3.0), product of:
              0.08821942 = queryWeight, product of:
                1.0947245 = boost
                5.177905 = idf(docFreq=680, maxDocs=44421)
                0.0155634275 = queryNorm
              0.49045905 = fieldWeight in 3765, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.177905 = idf(docFreq=680, maxDocs=44421)
                0.0546875 = fieldNorm(doc=3765)
          0.064304225 = weight(abstract_txt:detect in 3765) [ClassicSimilarity], result of:
            0.064304225 = score(doc=3765,freq=1.0), product of:
              0.1656989 = queryWeight, product of:
                1.5003154 = boost
                7.0962973 = idf(docFreq=99, maxDocs=44421)
                0.0155634275 = queryNorm
              0.38807875 = fieldWeight in 3765, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.0962973 = idf(docFreq=99, maxDocs=44421)
                0.0546875 = fieldNorm(doc=3765)
          0.7609528 = weight(abstract_txt:passages in 3765) [ClassicSimilarity], result of:
            0.7609528 = score(doc=3765,freq=14.0), product of:
              0.44981343 = queryWeight, product of:
                3.4958594 = boost
                8.267481 = idf(docFreq=30, maxDocs=44421)
                0.0155634275 = queryNorm
              1.6917076 = fieldWeight in 3765, product of:
                3.7416575 = tf(freq=14.0), with freq of:
                  14.0 = termFreq=14.0
                8.267481 = idf(docFreq=30, maxDocs=44421)
                0.0546875 = fieldNorm(doc=3765)
        0.2 = coord(5/25)
    
  5. Stamatatos, E.: Plagiarism detection using stopword n-grams (2011) 0.16
    0.1610881 = sum of:
      0.1610881 = product of:
        0.6712004 = sum of:
          0.011778619 = weight(abstract_txt:that in 955) [ClassicSimilarity], result of:
            0.011778619 = score(doc=955,freq=3.0), product of:
              0.036806498 = queryWeight, product of:
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.0155634275 = queryNorm
              0.32001466 = fieldWeight in 955, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.3649352 = idf(docFreq=11344, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
          0.007276618 = weight(abstract_txt:information in 955) [ClassicSimilarity], result of:
            0.007276618 = score(doc=955,freq=1.0), product of:
              0.038505405 = queryWeight, product of:
                1.0228186 = boost
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.0155634275 = queryNorm
              0.18897653 = fieldWeight in 955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4188995 = idf(docFreq=10748, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
          0.03231461 = weight(abstract_txt:compared in 955) [ClassicSimilarity], result of:
            0.03231461 = score(doc=955,freq=1.0), product of:
              0.08257044 = queryWeight, product of:
                1.0590954 = boost
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.0155634275 = queryNorm
              0.3913581 = fieldWeight in 955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0093837 = idf(docFreq=805, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
          0.09186318 = weight(abstract_txt:detect in 955) [ClassicSimilarity], result of:
            0.09186318 = score(doc=955,freq=1.0), product of:
              0.1656989 = queryWeight, product of:
                1.5003154 = boost
                7.0962973 = idf(docFreq=99, maxDocs=44421)
                0.0155634275 = queryNorm
              0.55439824 = fieldWeight in 955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.0962973 = idf(docFreq=99, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
          0.11709149 = weight(abstract_txt:detecting in 955) [ClassicSimilarity], result of:
            0.11709149 = score(doc=955,freq=1.0), product of:
              0.19479398 = queryWeight, product of:
                1.6267115 = boost
                7.694134 = idf(docFreq=54, maxDocs=44421)
                0.0155634275 = queryNorm
              0.60110426 = fieldWeight in 955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.694134 = idf(docFreq=54, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
          0.41087586 = weight(abstract_txt:passages in 955) [ClassicSimilarity], result of:
            0.41087586 = score(doc=955,freq=2.0), product of:
              0.44981343 = queryWeight, product of:
                3.4958594 = boost
                8.267481 = idf(docFreq=30, maxDocs=44421)
                0.0155634275 = queryNorm
              0.9134362 = fieldWeight in 955, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                8.267481 = idf(docFreq=30, maxDocs=44421)
                0.078125 = fieldNorm(doc=955)
        0.24 = coord(6/25)