I have a string
"bat and ball not pen or boat not phone"
I want to pick words adjacent to not
for example -- "not pen", "not phone"
but I was unable to do it? I have t开发者_运维百科ried to pick up the word by using the index and substring but its not possible.
tempTerm = tempTerm.Trim().Substring(0, tempTerm.Length - (orterm.Length + 1)).ToString();
How about using some Regex
Something like
string s = "bat and ball not pen or boat not phone";
Regex reg = new Regex("not\\s\\w+");
MatchCollection matches = reg.Matches(s);
foreach (Match match in matches)
{
string sub = match.Value;
}
See Learn Regular Expression (Regex) syntax with C# and .NET for some more details
You can split the sentence, and then just loop through looking for "not":
string sentence = "bat and ball not pen or boat not phone";
string[] words = sentence.Split(new char[] {' '});
List<string> wordsBesideNot = new List<string>();
for (int i = 0; i < words.Length - 1; i++)
{
if (words[i].Equals("not"))
wordsBesideNot.Add(words[i + 1]);
}
// At this point, wordsBesideNot is { "pen", "phone" }
String[] parts = myStr.Split(' ');
for (int i = 0; i < parts.Length; i++)
if (parts[i] == "not" && i + 1 < parts.Length)
someList.Add(parts[i + 1]);
This should get you all the words adjacent to not, you could compare with case insensitive if need be.
You can use this regex: not\s\w+\b
. It will match desired phrases:
not pen
not phone
I'd say start by splitting your string into an array - it will make this kind of thing a whole lot easier.
In C# I would so something like this
// Orginal string
string s = "bat and ball not pen or boat not phone";
// Seperator
string seperate = "not ";
// Length of the seperator
int length = seperate.Length;
// sCopy so you dont touch the original string
string sCopy = s.ToString();
// List to store the words, you could use an array if
// you count the 'not's.
List<string> stringList = new List<string>();
// While the seperator (not ) exists in the string
while (sCopy.IndexOf(seperate) != -1)
{
// Index of the next seperator
int index = sCopy.IndexOf(seperate);
// Remove anything before the seperator and the
// seperator itself.
sCopy = sCopy.Substring(index + length);
// In case of multiple spaces remove them.
sCopy = sCopy.TrimStart(' ');
// If there are more spaces or more words to come
// then specify the length
if (sCopy.IndexOf(' ') != -1)
{
// Cut the word out of sCopy
string sub = sCopy.Substring(0, sCopy.IndexOf(' '));
// Add the word to the list
stringList.Add(sub);
}
// Otherwise just get the rest of the string
else
{
// Cut the word out of sCopy
string sub = sCopy.Substring(0);
// Add the word to the list
stringList.Add(sub);
}
}
int p = 0;
The words in the list are pen and phone. This will fail when you get odd characters, full stops etc. If you don't know how the string is going to be constructed you might need something more complex.
public class StringHelper
{
/// <summary>
/// Gets the surrounding words of a given word in a given text.
/// </summary>
/// <param name="text">A text in which the given word to be searched.</param>
/// <param name="word">A word to be searched in the given text.</param>
/// <param name="prev">The number of previous words to include in the result.</param>
/// <param name="next">The number of next words to include in the result.</param>
/// <param name="all">Sets whether the method returns all instances of the search word.</param>
/// <returns>An array that consists of parts of the text, including the search word and the surrounding words.</returns>
public static List<string> GetSurroundingWords(string text, string word, int prev, int next, bool all = false)
{
var phrases = new List<string>();
var words = text.Split();
var indices = new List<int>();
var index = -1;
while ((index = Array.IndexOf(words, word, index + 1)) != -1)
{
indices.Add(index);
if (!all && indices.Count == 1)
break;
}
foreach (var ind in indices)
{
var prevActual = ind;
if (prev > prevActual)
prev = prevActual;
var nextActual = words.Length - ind;
if (next > nextActual)
next = nextActual;
var picked = new List<string>();
for (var i = 1; i <= prev; i++)
picked.Add(words[ind - i]);
picked.Reverse();
picked.Add(word);
for (var i = 1; i <= next; i++)
picked.Add(words[ind + i]);
phrases.Add(string.Join(" ", picked));
}
return phrases;
}
}
[TestClass]
public class StringHelperTests
{
private const string Text = "Date and Time in C# are handled by DateTime class in C# that provides properties and methods to format dates in different datetime formats.";
[TestMethod]
public void GetSurroundingWords()
{
// Arrange
var word = "class";
var expected = new [] { "DateTime class in C#" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
}
[TestMethod]
public void GetSurroundingWords_NoMatch()
{
// Arrange
var word = "classify";
var expected = new List<string>();
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 2);
// Assert
Assert.AreEqual(expected.Count, actual.Count);
}
[TestMethod]
public void GetSurroundingWords_MoreSurroundingWordsThanAvailable()
{
// Arrange
var word = "class";
var expected = "Date and Time in C# are handled by DateTime class in C#";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 50, 2);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_ZeroSurroundingWords()
{
// Arrange
var word = "class";
var expected = "class";
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 0, 0);
// Assert
Assert.AreEqual(expected.Length, actual[0].Length);
Assert.AreEqual(expected, actual[0]);
}
[TestMethod]
public void GetSurroundingWords_AllInstancesOfSearchWord()
{
// Arrange
var word = "and";
var expected = new[] { "Date and Time", "properties and methods" };
// Act
var actual = StringHelper.GetSurroundingWords(Text, word, 1, 1, true);
// Assert
Assert.AreEqual(expected.Length, actual.Count);
Assert.AreEqual(expected[0], actual[0]);
Assert.AreEqual(expected[1], actual[1]);
}
}
精彩评论