Document (#5814)

Author
Damerau, F.J.
Title
Generating an evaluating domain-oriented multi-word terms from texts
Source
Information processing and management. 29(1993) no.4, S.433-447
Year
1993
Abstract
Examines techniques for automatically generating domain vocabularies from large text collections. Focuses on the problem of generating multi-word vocabulary terms (specifically pairs). Discusses statistical issues associated with word co-occurrences likely to be of use in a natural language interface. Provides a more objective evaluation of the selection procedures. As substantial experimentation with subjects using a working query system is absent, all evaluation is necessarily subjective. Uses surrogate for experimentation by relying on pre-existing dictionaries as indicators of domain relevance
Theme
Automatisches Indexieren

Similar documents (content)

  1. Spiteri, L.F.: Word association testing and thesaurus construction : a pilot study (2005) 0.21
    0.2100063 = sum of:
      0.2100063 = product of:
        0.8750262 = sum of:
          0.06842075 = weight(abstract_txt:indicators in 216) [ClassicSimilarity], result of:
            0.06842075 = score(doc=216,freq=1.0), product of:
              0.12106899 = queryWeight, product of:
                1.0868161 = boost
                6.0281444 = idf(docFreq=290, maxDocs=44421)
                0.018479628 = queryNorm
              0.5651385 = fieldWeight in 216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0281444 = idf(docFreq=290, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
          0.09772786 = weight(abstract_txt:pairs in 216) [ClassicSimilarity], result of:
            0.09772786 = score(doc=216,freq=1.0), product of:
              0.15355122 = queryWeight, product of:
                1.2239572 = boost
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.018479628 = queryNorm
              0.6364512 = fieldWeight in 216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
          0.05841476 = weight(abstract_txt:terms in 216) [ClassicSimilarity], result of:
            0.05841476 = score(doc=216,freq=2.0), product of:
              0.108957246 = queryWeight, product of:
                1.4580843 = boost
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.018479628 = queryNorm
              0.53612554 = fieldWeight in 216, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
          0.09908957 = weight(abstract_txt:domain in 216) [ClassicSimilarity], result of:
            0.09908957 = score(doc=216,freq=1.0), product of:
              0.2235116 = queryWeight, product of:
                2.557704 = boost
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.018479628 = queryNorm
              0.44333076 = fieldWeight in 216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
          0.26100752 = weight(abstract_txt:word in 216) [ClassicSimilarity], result of:
            0.26100752 = score(doc=216,freq=3.0), product of:
              0.29558092 = queryWeight, product of:
                2.941294 = boost
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.018479628 = queryNorm
              0.8830324 = fieldWeight in 216, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
          0.2903658 = weight(abstract_txt:generating in 216) [ClassicSimilarity], result of:
            0.2903658 = score(doc=216,freq=1.0), product of:
              0.45769736 = queryWeight, product of:
                3.6600702 = boost
                6.7669935 = idf(docFreq=138, maxDocs=44421)
                0.018479628 = queryNorm
              0.6344056 = fieldWeight in 216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7669935 = idf(docFreq=138, maxDocs=44421)
                0.09375 = fieldNorm(doc=216)
        0.24 = coord(6/25)
    
  2. Yang, C.C.; Li, K.W.: Automatic construction of English/Chinese parallel corpora (2003) 0.12
    0.120196134 = sum of:
      0.120196134 = product of:
        0.50081724 = sum of:
          0.03109113 = weight(abstract_txt:statistical in 2683) [ClassicSimilarity], result of:
            0.03109113 = score(doc=2683,freq=1.0), product of:
              0.102499284 = queryWeight, product of:
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.018479628 = queryNorm
              0.3033302 = fieldWeight in 2683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
          0.034986906 = weight(abstract_txt:objective in 2683) [ClassicSimilarity], result of:
            0.034986906 = score(doc=2683,freq=1.0), product of:
              0.11089199 = queryWeight, product of:
                1.0401349 = boost
                5.7692223 = idf(docFreq=376, maxDocs=44421)
                0.018479628 = queryNorm
              0.31550434 = fieldWeight in 2683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.7692223 = idf(docFreq=376, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
          0.08062137 = weight(abstract_txt:pairs in 2683) [ClassicSimilarity], result of:
            0.08062137 = score(doc=2683,freq=2.0), product of:
              0.15355122 = queryWeight, product of:
                1.2239572 = boost
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.018479628 = queryNorm
              0.52504545 = fieldWeight in 2683, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
          0.12011873 = weight(abstract_txt:dictionaries in 2683) [ClassicSimilarity], result of:
            0.12011873 = score(doc=2683,freq=3.0), product of:
              0.1749833 = queryWeight, product of:
                1.3065857 = boost
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.018479628 = queryNorm
              0.6864583 = fieldWeight in 2683, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                7.2471204 = idf(docFreq=85, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
          0.08174472 = weight(abstract_txt:domain in 2683) [ClassicSimilarity], result of:
            0.08174472 = score(doc=2683,freq=2.0), product of:
              0.2235116 = queryWeight, product of:
                2.557704 = boost
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.018479628 = queryNorm
              0.3657292 = fieldWeight in 2683, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
          0.15225439 = weight(abstract_txt:word in 2683) [ClassicSimilarity], result of:
            0.15225439 = score(doc=2683,freq=3.0), product of:
              0.29558092 = queryWeight, product of:
                2.941294 = boost
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.018479628 = queryNorm
              0.5151022 = fieldWeight in 2683, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.0546875 = fieldNorm(doc=2683)
        0.24 = coord(6/25)
    
  3. Huo, W.: Automatic multi-word term extraction and its application to Web-page summarization (2012) 0.12
    0.120133825 = sum of:
      0.120133825 = product of:
        0.75083643 = sum of:
          0.06884246 = weight(abstract_txt:terms in 1563) [ClassicSimilarity], result of:
            0.06884246 = score(doc=1563,freq=4.0), product of:
              0.108957246 = queryWeight, product of:
                1.4580843 = boost
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.018479628 = queryNorm
              0.63183004 = fieldWeight in 1563, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.078125 = fieldNorm(doc=1563)
          0.26717302 = weight(abstract_txt:multi in 1563) [ClassicSimilarity], result of:
            0.26717302 = score(doc=1563,freq=6.0), product of:
              0.2350624 = queryWeight, product of:
                2.1416385 = boost
                5.9394164 = idf(docFreq=317, maxDocs=44421)
                0.018479628 = queryNorm
              1.1366047 = fieldWeight in 1563, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                5.9394164 = idf(docFreq=317, maxDocs=44421)
                0.078125 = fieldNorm(doc=1563)
          0.082574636 = weight(abstract_txt:domain in 1563) [ClassicSimilarity], result of:
            0.082574636 = score(doc=1563,freq=1.0), product of:
              0.2235116 = queryWeight, product of:
                2.557704 = boost
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.018479628 = queryNorm
              0.36944228 = fieldWeight in 1563, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.078125 = fieldNorm(doc=1563)
          0.3322463 = weight(abstract_txt:word in 1563) [ClassicSimilarity], result of:
            0.3322463 = score(doc=1563,freq=7.0), product of:
              0.29558092 = queryWeight, product of:
                2.941294 = boost
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.018479628 = queryNorm
              1.1240451 = fieldWeight in 1563, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.078125 = fieldNorm(doc=1563)
        0.16 = coord(4/25)
    
  4. Stoykova, V.; Petkova, E.: Automatic extraction of mathematical terms for precalculus (2012) 0.12
    0.115045376 = sum of:
      0.115045376 = product of:
        0.47935575 = sum of:
          0.053299077 = weight(abstract_txt:statistical in 1156) [ClassicSimilarity], result of:
            0.053299077 = score(doc=1156,freq=1.0), product of:
              0.102499284 = queryWeight, product of:
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.018479628 = queryNorm
              0.5199946 = fieldWeight in 1156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5466094 = idf(docFreq=470, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
          0.054365847 = weight(abstract_txt:oriented in 1156) [ClassicSimilarity], result of:
            0.054365847 = score(doc=1156,freq=1.0), product of:
              0.10386243 = queryWeight, product of:
                1.0066276 = boost
                5.58337 = idf(docFreq=453, maxDocs=44421)
                0.018479628 = queryNorm
              0.52344096 = fieldWeight in 1156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.58337 = idf(docFreq=453, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
          0.062143054 = weight(abstract_txt:evaluating in 1156) [ClassicSimilarity], result of:
            0.062143054 = score(doc=1156,freq=1.0), product of:
              0.11354537 = queryWeight, product of:
                1.0525054 = boost
                5.8378363 = idf(docFreq=351, maxDocs=44421)
                0.018479628 = queryNorm
              0.5472971 = fieldWeight in 1156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8378363 = idf(docFreq=351, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
          0.12784727 = weight(abstract_txt:occurrences in 1156) [ClassicSimilarity], result of:
            0.12784727 = score(doc=1156,freq=1.0), product of:
              0.18366878 = queryWeight, product of:
                1.3386198 = boost
                7.4248013 = idf(docFreq=71, maxDocs=44421)
                0.018479628 = queryNorm
              0.69607514 = fieldWeight in 1156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.4248013 = idf(docFreq=71, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
          0.08261095 = weight(abstract_txt:terms in 1156) [ClassicSimilarity], result of:
            0.08261095 = score(doc=1156,freq=4.0), product of:
              0.108957246 = queryWeight, product of:
                1.4580843 = boost
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.018479628 = queryNorm
              0.758196 = fieldWeight in 1156, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
          0.09908957 = weight(abstract_txt:domain in 1156) [ClassicSimilarity], result of:
            0.09908957 = score(doc=1156,freq=1.0), product of:
              0.2235116 = queryWeight, product of:
                2.557704 = boost
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.018479628 = queryNorm
              0.44333076 = fieldWeight in 1156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7288613 = idf(docFreq=1066, maxDocs=44421)
                0.09375 = fieldNorm(doc=1156)
        0.24 = coord(6/25)
    
  5. He, Q.: ¬A study of the strength indexes in co-word analysis (2000) 0.11
    0.11334755 = sum of:
      0.11334755 = product of:
        0.5667378 = sum of:
          0.06721287 = weight(abstract_txt:likely in 1111) [ClassicSimilarity], result of:
            0.06721287 = score(doc=1111,freq=2.0), product of:
              0.107230954 = queryWeight, product of:
                1.0228211 = boost
                5.673189 = idf(docFreq=414, maxDocs=44421)
                0.018479628 = queryNorm
              0.62680477 = fieldWeight in 1111, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.673189 = idf(docFreq=414, maxDocs=44421)
                0.078125 = fieldNorm(doc=1111)
          0.141058 = weight(abstract_txt:pairs in 1111) [ClassicSimilarity], result of:
            0.141058 = score(doc=1111,freq=3.0), product of:
              0.15355122 = queryWeight, product of:
                1.2239572 = boost
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.018479628 = queryNorm
              0.9186381 = fieldWeight in 1111, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.7888126 = idf(docFreq=135, maxDocs=44421)
                0.078125 = fieldNorm(doc=1111)
          0.10653939 = weight(abstract_txt:occurrences in 1111) [ClassicSimilarity], result of:
            0.10653939 = score(doc=1111,freq=1.0), product of:
              0.18366878 = queryWeight, product of:
                1.3386198 = boost
                7.4248013 = idf(docFreq=71, maxDocs=44421)
                0.018479628 = queryNorm
              0.5800626 = fieldWeight in 1111, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.4248013 = idf(docFreq=71, maxDocs=44421)
                0.078125 = fieldNorm(doc=1111)
          0.03442123 = weight(abstract_txt:terms in 1111) [ClassicSimilarity], result of:
            0.03442123 = score(doc=1111,freq=1.0), product of:
              0.108957246 = queryWeight, product of:
                1.4580843 = boost
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.018479628 = queryNorm
              0.31591502 = fieldWeight in 1111, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.043712 = idf(docFreq=2116, maxDocs=44421)
                0.078125 = fieldNorm(doc=1111)
          0.21750627 = weight(abstract_txt:word in 1111) [ClassicSimilarity], result of:
            0.21750627 = score(doc=1111,freq=3.0), product of:
              0.29558092 = queryWeight, product of:
                2.941294 = boost
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.018479628 = queryNorm
              0.73586035 = fieldWeight in 1111, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4380693 = idf(docFreq=524, maxDocs=44421)
                0.078125 = fieldNorm(doc=1111)
        0.2 = coord(5/25)