开发者

finding the word at a position in javascript

开发者 https://www.devze.com 2023-02-14 16:08 出处:网络
For string input of \'this is a sentence\' it must return \'is\' when position is 6 or 7. When position is 0, 1, 2,开发者_JAVA百科 3 or 4 result must be \'this\'.

For string input of 'this is a sentence' it must return 'is' when position is 6 or 7. When position is 0, 1, 2,开发者_JAVA百科 3 or 4 result must be 'this'.

What is the easiest way?


function getWordAt (str, pos) {

    // Perform type conversions.
    str = String(str);
    pos = Number(pos) >>> 0;

    // Search for the word's beginning and end.
    var left = str.slice(0, pos + 1).search(/\S+$/),
        right = str.slice(pos).search(/\s/);

    // The last word in the string is a special case.
    if (right < 0) {
        return str.slice(left);
    }

    // Return the word, using the located bounds to extract it from the string.
    return str.slice(left, right + pos);

}

This function accepts any whitespace character as a word separator, including spaces, tabs, and newlines. Essentially, it looks:

  • For the beginning of the word, matched by /\S+$/
  • Just past the end of the word, using /\s/

As written, the function will return "" if the index of a whitespace character is given; spaces are not part of words themselves. If you want the function to instead return the preceding word, change /\S+$/ to /\S+\s*/.


Here is some example output for "This is a sentence."

0: This
1: This
2: This
3: This
4:
5: is
6: is
7:
8: a
9:
10: sentence.
// ...
18: sentence.

Modified to return the preceding word, the output becomes:

0: This
1: This
2: This
3: This
4: This
5: is
6: is
7: is
8: a
9: a
10: sentence.
// ...
18: sentence.


I had some strange behavior in the most popular answer at time of writing with getting the word if the position is at the last character of a word that isn't the last word.

Here's my rendition:

  • Instead of returning the word, I return the start and end indexes for more flexibility.
  • I use a regular expression to detect the whitespace. This offers versatility under different locales and is still relatively performant because only 1 character is checked.
  • Note: When the position has spaces on either side of it (or the beginning/end of the string), the function returns [position, position]. This makes sense actually because the position does not have a word: it is a 0-length word.
function getWordBoundsAtPosition(str, position) {
  const isSpace = (c) => /\s/.exec(c);
  let start = position - 1;
  let end = position;

  while (start >= 0 && !isSpace(str[start])) {
    start -= 1;
  }
  start = Math.max(0, start + 1);

  while (end < str.length && !isSpace(str[end])) {
    end += 1;
  }
  end = Math.max(start, end);
  
  return [start, end];
}

To plug into substring, just deconstruct the returned bounds.

const myString = 'This is a sentence.';
const position = 7;

const [start, end] = getWordBoundsAtPosition(myString, position);
const wordAtPosition = myString.substring(start, end); // => 'is'

Cool Visualization

I created a visualization of where the bounds returned by this method are in your string below:

function getWordBoundsAtPosition(str, position) {
  const isSpace = (c) => /\s/.exec(c);
  let start = position - 1;
  let end = position;

  while (start >= 0 && !isSpace(str[start])) {
    start -= 1;
  }
  start = Math.max(0, start + 1);

  while (end < str.length && !isSpace(str[end])) {
    end += 1;
  }
  end = Math.max(start, end);
  
  return [start, end];
}

function analyzeStringWithCursor(str, bounds, cursorIdx) {
  const [start, end] = bounds;
  document.getElementById("cursor-position").textContent = String(cursorIdx);
  document.getElementById("fn-call").textContent = `getWordBoundsAtPosition("${str}", ${cursorIdx})`;
  document.getElementById("fn-result").textContent = `[${start}, ${end}]`; 
  document.getElementById("substring-call").textContent = `"${str}".substring(${start}, ${end})`; 
  document.getElementById("substring-result").textContent = `"${str.substring(start, end)}"`; 
  document.getElementById("viz").textContent = ` ${"0123456789".repeat(Math.floor((str.length - 1) / 10) + 1).substring(0, str.length)}
"${str}"
${" ".repeat(start) + "↗" + " ".repeat(end - start) + "↖"}`;
}

analyzeStringWithCursor(
  "",
  [0, 0],
  0
);

const update = (e) => {
  analyzeStringWithCursor(
    e.target.value,
    getWordBoundsAtPosition(e.target.value, e.target.selectionStart),
    e.target.selectionStart
  );
};


document.getElementById("input").addEventListener('keyup', update);
document.getElementById("input").addEventListener('click', update);
<p>Type some words below. Your cursor position will be used as the position argument to the function.</p>
<input id="input" placeholder="Start typing some words..." />
<pre id="viz" style="font-size: 2rem; margin: 0.25rem"></pre>
<ul>
  <li>Cursor Position: <code id="cursor-position"></code></li>
  <li><code id="fn-call"></code>: <code id="fn-result"></code></li>
  <li><code id="substring-call"></code>: <code id="substring-result"></code></li>
</ul>


var str = "this is a sentence";

function GetWordByPos(str, pos) {
    var left = str.substr(0, pos);
    var right = str.substr(pos);

    left = left.replace(/^.+ /g, "");
    right = right.replace(/ .+$/g, "");

    return left + right;
}

alert(GetWordByPos(str, 6));

P.S. Not tested throughly and no error handling yet.


function getWordAt(s, pos) {
  // make pos point to a character of the word
  while (s[pos] == " ") pos--;
  // find the space before that word
  // (add 1 to be at the begining of that word)
  // (note that it works even if there is no space before that word)
  pos = s.lastIndexOf(" ", pos) + 1;
  // find the end of the word
  var end = s.indexOf(" ", pos);
  if (end == -1) end = s.length; // set to length if it was the last word
  // return the result
  return s.substring(pos, end);
}
getWordAt("this is a sentence", 4);


function getWordAt(str, pos) {

   // Sanitise input
   str = str + "";
   pos = parseInt(pos, 10);

   // Snap to a word on the left
   if (str[pos] == " ") {
      pos = pos - 1;
   }

   // Handle exceptional cases
   if (pos < 0 || pos >= str.length-1 || str[pos] == " ") {
      return "";
   }

   // Build word
   var acc = "";
   for ( ; pos > 0 && str[pos-1] != " "; pos--) {}
   for ( ; pos < str.length && str[pos] != " "; pos++) {
      acc += str[pos];
   }

   return acc;
}

alert(getWordAt("this is a sentence", 6));

Something like this. Be sure to thoroughly test the loop logic; I didn't.


I ended up with my own solution.

Note 1: the regular expressions are not tested thoroughly.

Note 2: this solution is fast (even for big strings) and works even if position (pos argument) is in the middle of a word.

function getWordByPosition(str, pos) {
  let leftSideString = str.substr(0, pos);
  let rightSideString = str.substr(pos);

  let leftMatch = leftSideString.match(/[^.,\s]*$/);
  let rightMatch = rightSideString.match(/^[^.,\s]*/);

  let resultStr = '';

  if (leftMatch) {
      resultStr += leftMatch[0];
  }

  if (rightMatch) {
      resultStr += rightMatch[0];
  }

  return {
      index: leftMatch.index,
      word: resultStr
  };
}
0

精彩评论

暂无评论...
验证码 换一张
取 消